From 11413893a0d0b19df976eb9a2b600ba56802db2f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jan 2022 12:24:00 +0100 Subject: [PATCH 0001/1586] EDAC: Use proper list of struct attribute for attributes The EDAC sysfs code is doing some crazy casting of the list of attributes that is not necessary at all. Instead, properly point to the correct attribute structure in the lists, which removes the need to cast anything and the code is now properly typesafe (as much as sysfs attribute logic is typesafe...) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220104112401.1067148-1-gregkh@linuxfoundation.org --- drivers/edac/edac_device_sysfs.c | 28 ++++++++++++++-------------- drivers/edac/edac_pci_sysfs.c | 24 ++++++++++++------------ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 5e75937537997..f4aefced2ccf8 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -163,11 +163,11 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); /* Base Attributes of the EDAC_DEVICE ECC object */ -static struct ctl_info_attribute *device_ctrl_attr[] = { - &attr_ctl_info_panic_on_ue, - &attr_ctl_info_log_ue, - &attr_ctl_info_log_ce, - &attr_ctl_info_poll_msec, +static struct attribute *device_ctrl_attr[] = { + &attr_ctl_info_panic_on_ue.attr, + &attr_ctl_info_log_ue.attr, + &attr_ctl_info_log_ce.attr, + &attr_ctl_info_poll_msec.attr, NULL, }; @@ -217,7 +217,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj) static struct kobj_type ktype_device_ctrl = { .release = edac_device_ctrl_master_release, .sysfs_ops = &device_ctl_info_ops, - .default_attrs = (struct attribute **)device_ctrl_attr, + .default_attrs = device_ctrl_attr, }; /* @@ -389,9 +389,9 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); /* list of edac_dev 'instance' attributes */ -static struct instance_attribute *device_instance_attr[] = { - &attr_instance_ce_count, - &attr_instance_ue_count, +static struct attribute *device_instance_attr[] = { + &attr_instance_ce_count.attr, + &attr_instance_ue_count.attr, NULL, }; @@ -399,7 +399,7 @@ static struct instance_attribute *device_instance_attr[] = { static struct kobj_type ktype_instance_ctrl = { .release = edac_device_ctrl_instance_release, .sysfs_ops = &device_instance_ops, - .default_attrs = (struct attribute **)device_instance_attr, + .default_attrs = device_instance_attr, }; /* edac_dev -> instance -> block information */ @@ -487,9 +487,9 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); /* list of edac_dev 'block' attributes */ -static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { - &attr_block_ce_count, - &attr_block_ue_count, +static struct attribute *device_block_attr[] = { + &attr_block_ce_count.attr, + &attr_block_ue_count.attr, NULL, }; @@ -497,7 +497,7 @@ static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { static struct kobj_type ktype_block_ctrl = { .release = edac_device_ctrl_block_release, .sysfs_ops = &device_block_ops, - .default_attrs = (struct attribute **)device_block_attr, + .default_attrs = device_block_attr, }; /* block ctor/dtor code */ diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 53042af7262e2..c041fb8f3ef27 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -135,9 +135,9 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); /* pci instance attributes */ -static struct instance_attribute *pci_instance_attr[] = { - &attr_instance_pe_count, - &attr_instance_npe_count, +static struct attribute *pci_instance_attr[] = { + &attr_instance_pe_count.attr, + &attr_instance_npe_count.attr, NULL }; @@ -145,7 +145,7 @@ static struct instance_attribute *pci_instance_attr[] = { static struct kobj_type ktype_pci_instance = { .release = edac_pci_instance_release, .sysfs_ops = &pci_instance_ops, - .default_attrs = (struct attribute **)pci_instance_attr, + .default_attrs = pci_instance_attr, }; /* @@ -292,13 +292,13 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); /* Base Attributes of the memory ECC object */ -static struct edac_pci_dev_attribute *edac_pci_attr[] = { - &edac_pci_attr_check_pci_errors, - &edac_pci_attr_edac_pci_log_pe, - &edac_pci_attr_edac_pci_log_npe, - &edac_pci_attr_edac_pci_panic_on_pe, - &edac_pci_attr_pci_parity_count, - &edac_pci_attr_pci_nonparity_count, +static struct attribute *edac_pci_attr[] = { + &edac_pci_attr_check_pci_errors.attr, + &edac_pci_attr_edac_pci_log_pe.attr, + &edac_pci_attr_edac_pci_log_npe.attr, + &edac_pci_attr_edac_pci_panic_on_pe.attr, + &edac_pci_attr_pci_parity_count.attr, + &edac_pci_attr_pci_nonparity_count.attr, NULL, }; @@ -327,7 +327,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj) static struct kobj_type ktype_edac_pci_main_kobj = { .release = edac_pci_release_main_kobj, .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = (struct attribute **)edac_pci_attr, + .default_attrs = edac_pci_attr, }; /** -- GitLab From 625c6b55699777ece6b87235f579e2cfbde049ce Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jan 2022 12:24:01 +0100 Subject: [PATCH 0002/1586] EDAC: Use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the edac sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that the obsolete default_attrs field can be removed soon. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220104112401.1067148-2-gregkh@linuxfoundation.org --- drivers/edac/edac_device_sysfs.c | 15 +++++++++------ drivers/edac/edac_pci_sysfs.c | 10 ++++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index f4aefced2ccf8..9a61d92bdf420 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); /* Base Attributes of the EDAC_DEVICE ECC object */ -static struct attribute *device_ctrl_attr[] = { +static struct attribute *device_ctrl_attrs[] = { &attr_ctl_info_panic_on_ue.attr, &attr_ctl_info_log_ue.attr, &attr_ctl_info_log_ce.attr, &attr_ctl_info_poll_msec.attr, NULL, }; +ATTRIBUTE_GROUPS(device_ctrl); /* * edac_device_ctrl_master_release @@ -217,7 +218,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj) static struct kobj_type ktype_device_ctrl = { .release = edac_device_ctrl_master_release, .sysfs_ops = &device_ctl_info_ops, - .default_attrs = device_ctrl_attr, + .default_groups = device_ctrl_groups, }; /* @@ -389,17 +390,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); /* list of edac_dev 'instance' attributes */ -static struct attribute *device_instance_attr[] = { +static struct attribute *device_instance_attrs[] = { &attr_instance_ce_count.attr, &attr_instance_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_instance); /* The 'ktype' for each edac_dev 'instance' */ static struct kobj_type ktype_instance_ctrl = { .release = edac_device_ctrl_instance_release, .sysfs_ops = &device_instance_ops, - .default_attrs = device_instance_attr, + .default_groups = device_instance_groups, }; /* edac_dev -> instance -> block information */ @@ -487,17 +489,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); /* list of edac_dev 'block' attributes */ -static struct attribute *device_block_attr[] = { +static struct attribute *device_block_attrs[] = { &attr_block_ce_count.attr, &attr_block_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_block); /* The 'ktype' for each edac_dev 'block' */ static struct kobj_type ktype_block_ctrl = { .release = edac_device_ctrl_block_release, .sysfs_ops = &device_block_ops, - .default_attrs = device_block_attr, + .default_groups = device_block_groups, }; /* block ctor/dtor code */ diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index c041fb8f3ef27..888d5728ecef7 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); /* pci instance attributes */ -static struct attribute *pci_instance_attr[] = { +static struct attribute *pci_instance_attrs[] = { &attr_instance_pe_count.attr, &attr_instance_npe_count.attr, NULL }; +ATTRIBUTE_GROUPS(pci_instance); /* the ktype for a pci instance */ static struct kobj_type ktype_pci_instance = { .release = edac_pci_instance_release, .sysfs_ops = &pci_instance_ops, - .default_attrs = pci_instance_attr, + .default_groups = pci_instance_groups, }; /* @@ -292,7 +293,7 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); /* Base Attributes of the memory ECC object */ -static struct attribute *edac_pci_attr[] = { +static struct attribute *edac_pci_attrs[] = { &edac_pci_attr_check_pci_errors.attr, &edac_pci_attr_edac_pci_log_pe.attr, &edac_pci_attr_edac_pci_log_npe.attr, @@ -301,6 +302,7 @@ static struct attribute *edac_pci_attr[] = { &edac_pci_attr_pci_nonparity_count.attr, NULL, }; +ATTRIBUTE_GROUPS(edac_pci); /* * edac_pci_release_main_kobj @@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj) static struct kobj_type ktype_edac_pci_main_kobj = { .release = edac_pci_release_main_kobj, .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = edac_pci_attr, + .default_groups = edac_pci_groups, }; /** -- GitLab From 4810dd2c943edd98cd41a12b96745b16b1d6b4f5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 2 Dec 2021 11:50:24 +0200 Subject: [PATCH 0003/1586] perf/tests: Add AMX instructions to x86 instruction decoder test The x86 instruction decoder is used for both kernel instructions and user space instructions (e.g. uprobes, perf tools Intel PT), so it is good to update it with new instructions. Add AMX instructions to the x86 instruction decoder test. A subsequent patch adds the instructions to the instruction decoder. Reference: Intel Architecture Instruction Set Extensions and Future Features Programming Reference May 2021 Document Number: 319433-044 Example: $ INSN='ldtilecfg\|sttilecfg\|tdpbf16ps\|tdpbssd\|' $ INSN+='tdpbsud\|tdpbusd\|'tdpbuud\|tileloadd\|' $ INSN+='tileloaddt1\|tilerelease\|tilestored\|tilezero' $ perf test -v "x86 instruction decoder" |& grep -i $INSN Failed to decode: c4 e2 78 49 04 c8 ldtilecfg (%rax,%rcx,8) Failed to decode: c4 c2 78 49 04 c8 ldtilecfg (%r8,%rcx,8) Failed to decode: c4 e2 79 49 04 c8 sttilecfg (%rax,%rcx,8) Failed to decode: c4 c2 79 49 04 c8 sttilecfg (%r8,%rcx,8) Failed to decode: c4 e2 7a 5c d1 tdpbf16ps %tmm0,%tmm1,%tmm2 Failed to decode: c4 e2 7b 5e d1 tdpbssd %tmm0,%tmm1,%tmm2 Failed to decode: c4 e2 7a 5e d1 tdpbsud %tmm0,%tmm1,%tmm2 Failed to decode: c4 e2 79 5e d1 tdpbusd %tmm0,%tmm1,%tmm2 Failed to decode: c4 e2 78 5e d1 tdpbuud %tmm0,%tmm1,%tmm2 Failed to decode: c4 e2 7b 4b 0c c8 tileloadd (%rax,%rcx,8),%tmm1 Failed to decode: c4 c2 7b 4b 14 c8 tileloadd (%r8,%rcx,8),%tmm2 Failed to decode: c4 e2 79 4b 0c c8 tileloaddt1 (%rax,%rcx,8),%tmm1 Failed to decode: c4 c2 79 4b 14 c8 tileloaddt1 (%r8,%rcx,8),%tmm2 Failed to decode: c4 e2 78 49 c0 tilerelease Failed to decode: c4 e2 7a 4b 0c c8 tilestored %tmm1,(%rax,%rcx,8) Failed to decode: c4 c2 7a 4b 14 c8 tilestored %tmm2,(%r8,%rcx,8) Failed to decode: c4 e2 7b 49 c0 tilezero %tmm0 Failed to decode: c4 e2 7b 49 f8 tilezero %tmm7 Signed-off-by: Adrian Hunter Signed-off-by: Borislav Petkov Acked-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20211202095029.2165714-2-adrian.hunter@intel.com --- tools/perf/arch/x86/tests/insn-x86-dat-64.c | 36 ++++++++++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-src.c | 21 ++++++++++++ 2 files changed, 57 insertions(+) diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 5da17d41d302b..3548565a1cc55 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2459,6 +2459,42 @@ "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, {{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", "3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 e2 78 49 04 c8 \tldtilecfg (%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 c2 78 49 04 c8 \tldtilecfg (%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 e2 79 49 04 c8 \tsttilecfg (%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 c2 79 49 04 c8 \tsttilecfg (%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x7a, 0x5c, 0xd1, }, 5, 0, "", "", +"c4 e2 7a 5c d1 \ttdpbf16ps %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7b, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 7b 5e d1 \ttdpbssd %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7a, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 7a 5e d1 \ttdpbsud %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x79, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 79 5e d1 \ttdpbusd %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x78, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 78 5e d1 \ttdpbuud %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7b, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 7b 4b 0c c8 \ttileloadd (%rax,%rcx,8),%tmm1",}, +{{0xc4, 0xc2, 0x7b, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 7b 4b 14 c8 \ttileloadd (%r8,%rcx,8),%tmm2",}, +{{0xc4, 0xe2, 0x79, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 79 4b 0c c8 \ttileloaddt1 (%rax,%rcx,8),%tmm1",}, +{{0xc4, 0xc2, 0x79, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 79 4b 14 c8 \ttileloaddt1 (%r8,%rcx,8),%tmm2",}, +{{0xc4, 0xe2, 0x78, 0x49, 0xc0, }, 5, 0, "", "", +"c4 e2 78 49 c0 \ttilerelease ",}, +{{0xc4, 0xe2, 0x7a, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 7a 4b 0c c8 \ttilestored %tmm1,(%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x7a, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 7a 4b 14 c8 \ttilestored %tmm2,(%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x7b, 0x49, 0xc0, }, 5, 0, "", "", +"c4 e2 7b 49 c0 \ttilezero %tmm0",}, +{{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "", +"c4 e2 7b 49 f8 \ttilezero %tmm7",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index c3808e94c46e0..7906f7b2ffeb9 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1910,6 +1910,27 @@ int main(void) asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + /* AMX */ + + asm volatile("ldtilecfg (%rax,%rcx,8)"); + asm volatile("ldtilecfg (%r8,%rcx,8)"); + asm volatile("sttilecfg (%rax,%rcx,8)"); + asm volatile("sttilecfg (%r8,%rcx,8)"); + asm volatile("tdpbf16ps %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbssd %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbsud %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbusd %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbuud %tmm0, %tmm1, %tmm2"); + asm volatile("tileloadd (%rax,%rcx,8), %tmm1"); + asm volatile("tileloadd (%r8,%rcx,8), %tmm2"); + asm volatile("tileloaddt1 (%rax,%rcx,8), %tmm1"); + asm volatile("tileloaddt1 (%r8,%rcx,8), %tmm2"); + asm volatile("tilerelease"); + asm volatile("tilestored %tmm1, (%rax,%rcx,8)"); + asm volatile("tilestored %tmm2, (%r8,%rcx,8)"); + asm volatile("tilezero %tmm0"); + asm volatile("tilezero %tmm7"); + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ -- GitLab From 9dd94df75b30eca03ed2151dd5bbc152a6f19abf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 2 Dec 2021 11:50:25 +0200 Subject: [PATCH 0004/1586] x86/insn: Add AMX instructions to the x86 instruction decoder The x86 instruction decoder is used for both kernel instructions and user space instructions (e.g. uprobes, perf tools Intel PT), so it is good to update it with new instructions. Add AMX instructions to the x86 instruction decoder. Reference: Intel Architecture Instruction Set Extensions and Future Features Programming Reference May 2021 Document Number: 319433-044 Example using perf tools' x86 instruction decoder test: $ INSN='ldtilecfg\|sttilecfg\|tdpbf16ps\|tdpbssd\|' $ INSN+='tdpbsud\|tdpbusd\|'tdpbuud\|tileloadd\|' $ INSN+='tileloaddt1\|tilerelease\|tilestored\|tilezero' $ perf test -v "x86 instruction decoder" |& grep -i $INSN Decoded ok: c4 e2 78 49 04 c8 ldtilecfg (%rax,%rcx,8) Decoded ok: c4 c2 78 49 04 c8 ldtilecfg (%r8,%rcx,8) Decoded ok: c4 e2 79 49 04 c8 sttilecfg (%rax,%rcx,8) Decoded ok: c4 c2 79 49 04 c8 sttilecfg (%r8,%rcx,8) Decoded ok: c4 e2 7a 5c d1 tdpbf16ps %tmm0,%tmm1,%tmm2 Decoded ok: c4 e2 7b 5e d1 tdpbssd %tmm0,%tmm1,%tmm2 Decoded ok: c4 e2 7a 5e d1 tdpbsud %tmm0,%tmm1,%tmm2 Decoded ok: c4 e2 79 5e d1 tdpbusd %tmm0,%tmm1,%tmm2 Decoded ok: c4 e2 78 5e d1 tdpbuud %tmm0,%tmm1,%tmm2 Decoded ok: c4 e2 7b 4b 0c c8 tileloadd (%rax,%rcx,8),%tmm1 Decoded ok: c4 c2 7b 4b 14 c8 tileloadd (%r8,%rcx,8),%tmm2 Decoded ok: c4 e2 79 4b 0c c8 tileloaddt1 (%rax,%rcx,8),%tmm1 Decoded ok: c4 c2 79 4b 14 c8 tileloaddt1 (%r8,%rcx,8),%tmm2 Decoded ok: c4 e2 78 49 c0 tilerelease Decoded ok: c4 e2 7a 4b 0c c8 tilestored %tmm1,(%rax,%rcx,8) Decoded ok: c4 c2 7a 4b 14 c8 tilestored %tmm2,(%r8,%rcx,8) Decoded ok: c4 e2 7b 49 c0 tilezero %tmm0 Decoded ok: c4 e2 7b 49 f8 tilezero %tmm7 Signed-off-by: Adrian Hunter Signed-off-by: Borislav Petkov Acked-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20211202095029.2165714-3-adrian.hunter@intel.com --- arch/x86/lib/x86-opcode-map.txt | 10 ++++++++-- tools/arch/x86/lib/x86-opcode-map.txt | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323d..b2cc6c04cbfe7 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -690,7 +690,10 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) @@ -705,7 +708,10 @@ AVXcode: 2 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323d..b2cc6c04cbfe7 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -690,7 +690,10 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) @@ -705,7 +708,10 @@ AVXcode: 2 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) -- GitLab From a6ea1142dee66f054a7ce51ebd053ef5ad976227 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 2 Dec 2021 11:50:26 +0200 Subject: [PATCH 0005/1586] perf/tests: Add misc instructions to the x86 instruction decoder test The x86 instruction decoder is used for both kernel instructions and user space instructions (e.g. uprobes, perf tools Intel PT), so it is good to update it with new instructions. Add the following instructions to the x86 instruction decoder test: User Interrupt clui senduipi stui testui uiret Prediction history reset hreset Serialize instruction execution serialize TSX suspend load address tracking xresldtrk xsusldtrk A subsequent patch adds the instructions to the instruction decoder. Reference: Intel Architecture Instruction Set Extensions and Future Features Programming Reference May 2021 Document Number: 319433-044 Example: $ perf test -v "x86 instruction decoder" |& grep -i hreset Failed to decode length (4 vs expected 6): f3 0f 3a f0 c0 00 hreset $0x0 Failed to decode length (4 vs expected 6): f3 0f 3a f0 c0 00 hreset $0x0 Signed-off-by: Adrian Hunter Signed-off-by: Borislav Petkov Acked-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20211202095029.2165714-4-adrian.hunter@intel.com --- tools/perf/arch/x86/tests/insn-x86-dat-32.c | 8 +++++++ tools/perf/arch/x86/tests/insn-x86-dat-64.c | 20 ++++++++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-src.c | 22 ++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 9708ae8920616..79e2050cd1c26 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2197,6 +2197,14 @@ "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", +"f3 0f 3a f0 c0 00 \threset $0x0",}, +{{0x0f, 0x01, 0xe8, }, 3, 0, "", "", +"0f 01 e8 \tserialize ",}, +{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "", +"f2 0f 01 e9 \txresldtrk ",}, +{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f2 0f 01 e8 \txsusldtrk ",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 3548565a1cc55..b2d0ba45262b3 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2495,6 +2495,26 @@ "c4 e2 7b 49 c0 \ttilezero %tmm0",}, {{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "", "c4 e2 7b 49 f8 \ttilezero %tmm7",}, +{{0xf3, 0x0f, 0x01, 0xee, }, 4, 0, "", "", +"f3 0f 01 ee \tclui ",}, +{{0xf3, 0x0f, 0xc7, 0xf0, }, 4, 0, "", "", +"f3 0f c7 f0 \tsenduipi %rax",}, +{{0xf3, 0x41, 0x0f, 0xc7, 0xf0, }, 5, 0, "", "", +"f3 41 0f c7 f0 \tsenduipi %r8",}, +{{0xf3, 0x0f, 0x01, 0xef, }, 4, 0, "", "", +"f3 0f 01 ef \tstui ",}, +{{0xf3, 0x0f, 0x01, 0xed, }, 4, 0, "", "", +"f3 0f 01 ed \ttestui ",}, +{{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "", +"f3 0f 01 ec \tuiret ",}, +{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", +"f3 0f 3a f0 c0 00 \threset $0x0",}, +{{0x0f, 0x01, 0xe8, }, 3, 0, "", "", +"0f 01 e8 \tserialize ",}, +{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "", +"f2 0f 01 e9 \txresldtrk ",}, +{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f2 0f 01 e8 \txsusldtrk ",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index 7906f7b2ffeb9..425db6a1b580a 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1931,6 +1931,15 @@ int main(void) asm volatile("tilezero %tmm0"); asm volatile("tilezero %tmm7"); + /* User Interrupt */ + + asm volatile("clui"); + asm volatile("senduipi %rax"); + asm volatile("senduipi %r8"); + asm volatile("stui"); + asm volatile("testui"); + asm volatile("uiret"); + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3693,6 +3702,19 @@ int main(void) #endif /* #ifndef __x86_64__ */ + /* Prediction history reset */ + + asm volatile("hreset $0"); + + /* Serialize instruction execution */ + + asm volatile("serialize"); + + /* TSX suspend load address tracking */ + + asm volatile("xresldtrk"); + asm volatile("xsusldtrk"); + /* SGX */ asm volatile("encls"); -- GitLab From 0153d98f2dd6d5161fc4d496d785c10686d0d7b6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 2 Dec 2021 11:50:27 +0200 Subject: [PATCH 0006/1586] x86/insn: Add misc instructions to x86 instruction decoder x86 instruction decoder is used for both kernel instructions and user space instructions (e.g. uprobes, perf tools Intel PT), so it is good to update it with new instructions. Add instructions to x86 instruction decoder: User Interrupt clui senduipi stui testui uiret Prediction history reset hreset Serialize instruction execution serialize TSX suspend load address tracking xresldtrk xsusldtrk Reference: Intel Architecture Instruction Set Extensions and Future Features Programming Reference May 2021 Document Number: 319433-044 Example using perf tools' x86 instruction decoder test: $ perf test -v "x86 instruction decoder" |& grep -i hreset Decoded ok: f3 0f 3a f0 c0 00 hreset $0x0 Decoded ok: f3 0f 3a f0 c0 00 hreset $0x0 Signed-off-by: Adrian Hunter Signed-off-by: Borislav Petkov Acked-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20211202095029.2165714-5-adrian.hunter@intel.com --- arch/x86/lib/x86-opcode-map.txt | 6 +++--- tools/arch/x86/lib/x86-opcode-map.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index b2cc6c04cbfe7..591797a931bfc 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -893,7 +893,7 @@ cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) EndTable GrpTable: Grp1 @@ -976,7 +976,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -993,7 +993,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index b2cc6c04cbfe7..591797a931bfc 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -893,7 +893,7 @@ cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) EndTable GrpTable: Grp1 @@ -976,7 +976,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -993,7 +993,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable -- GitLab From cdb63ba98c5d03774bca9789e689fe62be4347b4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 2 Dec 2021 11:50:28 +0200 Subject: [PATCH 0007/1586] perf/tests: Add AVX512-FP16 instructions to x86 instruction decoder test The x86 instruction decoder is used for both kernel instructions and user space instructions (e.g. uprobes, perf tools Intel PT), so it is good to update it with new instructions. Add AVX512-FP16 instructions to x86 instruction decoder test. A subsequent patch adds the instructions to the instruction decoder. Reference: Intel AVX512-FP16 Architecture Specification June 2021 Revision 1.0 Document Number: 347407-001US Example: $ perf test -v "x86 instruction decoder" |& grep vfcmaddcph | head -2 Failed to decode: 62 f6 6f 48 56 cb vfcmaddcph %zmm3,%zmm2,%zmm1 Failed to decode: 62 f6 6f 48 56 8c c8 78 56 34 12 vfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1 Signed-off-by: Adrian Hunter Signed-off-by: Borislav Petkov Acked-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20211202095029.2165714-6-adrian.hunter@intel.com --- tools/perf/arch/x86/tests/insn-x86-dat-32.c | 910 ++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-64.c | 1370 ++++++++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-src.c | 1146 +++++++++++++++ 3 files changed, 3426 insertions(+) diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 79e2050cd1c26..ba429cadb18f3 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2197,6 +2197,916 @@ "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",}, {{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", "f3 0f 3a f0 c0 00 \threset $0x0",}, {{0x0f, 0x01, 0xe8, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index b2d0ba45262b3..3a47e98fec331 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2507,6 +2507,1376 @@ "f3 0f 01 ed \ttestui ",}, {{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "", "f3 0f 01 ec \tuiret ",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%zmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%xmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%ymm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%rax,%rcx,8),%xmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%rax",}, +{{0x67, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 79 c1 \tvcvtsh2usi %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0xee, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 ee 08 2a c8 \tvcvtsi2sh %rax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 2c c1 \tvcvttsh2si %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 78 c1 \tvcvttsh2usi %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0xee, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 ee 08 7b c8 \tvcvtusi2sh %rax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%rax,%rcx,8),%k5",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",}, {{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", "f3 0f 3a f0 c0 00 \threset $0x0",}, {{0x0f, 0x01, 0xe8, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index 425db6a1b580a..a391464c8dee7 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1940,6 +1940,694 @@ int main(void) asm volatile("testui"); asm volatile("uiret"); + /* AVX512-FP16 */ + + asm volatile("vaddph %zmm3, %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vaddph %xmm3, %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vaddph %ymm3, %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vaddsh %xmm3, %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5"); + asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcomish %xmm2, %xmm1"); + asm volatile("vcomish 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtdq2ph %zmm2, %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtdq2ph %xmm2, %xmm1"); + asm volatile("vcvtdq2ph %ymm2, %xmm1"); + asm volatile("vcvtpd2ph %zmm2, %xmm1"); + asm volatile("vcvtpd2ph %xmm2, %xmm1"); + asm volatile("vcvtpd2ph %ymm2, %xmm1"); + asm volatile("vcvtph2dq %ymm2, %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2dq %xmm2, %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2dq %xmm2, %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2pd %xmm2, %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2pd %xmm2, %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2pd %xmm2, %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %ymm2, %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2psx %ymm2, %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2psx %xmm2, %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2psx %xmm2, %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2qq %xmm2, %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2qq %xmm2, %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2qq %xmm2, %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2udq %ymm2, %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2udq %xmm2, %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2udq %xmm2, %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uqq %xmm2, %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uqq %xmm2, %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uqq %xmm2, %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uw %zmm2, %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uw %xmm2, %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uw %ymm2, %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2w %zmm2, %zmm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2w %xmm2, %xmm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2w %ymm2, %ymm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2phx %zmm2, %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2phx %xmm2, %xmm1"); + asm volatile("vcvtps2phx %ymm2, %xmm1"); + asm volatile("vcvtqq2ph %zmm2, %xmm1"); + asm volatile("vcvtqq2ph %xmm2, %xmm1"); + asm volatile("vcvtqq2ph %ymm2, %xmm1"); + asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2usi %xmm1, %eax"); + asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2usi %xmm1, %rax"); + asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh %rax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvttph2dq %ymm2, %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2dq %xmm2, %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2dq %xmm2, %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2qq %xmm2, %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2qq %xmm2, %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2qq %xmm2, %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2udq %ymm2, %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2udq %xmm2, %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2udq %xmm2, %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uqq %xmm2, %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uqq %xmm2, %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uqq %xmm2, %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uw %zmm2, %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uw %xmm2, %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uw %ymm2, %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2w %zmm2, %zmm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2w %xmm2, %xmm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2w %ymm2, %ymm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttsh2si %xmm1, %eax"); + asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2si %xmm1, %rax"); + asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvttsh2usi %xmm1, %eax"); + asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2usi %xmm1, %rax"); + asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtudq2ph %zmm2, %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtudq2ph %xmm2, %xmm1"); + asm volatile("vcvtudq2ph %ymm2, %xmm1"); + asm volatile("vcvtuqq2ph %zmm2, %xmm1"); + asm volatile("vcvtuqq2ph %xmm2, %xmm1"); + asm volatile("vcvtuqq2ph %ymm2, %xmm1"); + asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh %rax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtuw2ph %zmm2, %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtuw2ph %xmm2, %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtuw2ph %ymm2, %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtw2ph %zmm2, %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtw2ph %xmm2, %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtw2ph %ymm2, %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vdivph %zmm3, %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vdivph %xmm3, %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vdivph %ymm3, %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vdivsh %xmm3, %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfpclassph $0x12, %zmm1, %k5"); + asm volatile("vfpclassph $0x12, %xmm1, %k5"); + asm volatile("vfpclassph $0x12, %ymm1, %k5"); + asm volatile("vfpclasssh $0x12, %xmm1, %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%rax,%rcx,8), %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5"); + asm volatile("vgetexpph %zmm2, %zmm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetexpph %xmm2, %xmm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetexpph %ymm2, %ymm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, %zmm2, %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetmantph $0x12, %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetmantph $0x12, %ymm2, %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %zmm3, %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmaxph %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %ymm3, %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmaxsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %zmm3, %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vminph %xmm3, %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %ymm3, %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vminsh %xmm3, %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmovsh %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovsh 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmovsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmovw %xmm1, %eax"); + asm volatile("vmovw %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovw %eax, %xmm1"); + asm volatile("vmovw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmulph %zmm3, %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmulph %xmm3, %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmulph %ymm3, %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmulsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrcpph %zmm2, %zmm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrcpph %xmm2, %xmm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrcpph %ymm2, %ymm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrcpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, %zmm2, %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vreduceph $0x12, %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vreduceph $0x12, %ymm2, %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, %zmm2, %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, %ymm2, %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtph %zmm2, %zmm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrsqrtph %xmm2, %xmm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrsqrtph %ymm2, %ymm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %zmm3, %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vscalefph %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %ymm3, %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vscalefsh %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsqrtph %zmm2, %zmm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vsqrtph %xmm2, %xmm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vsqrtph %ymm2, %ymm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %zmm3, %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vsubph %xmm3, %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %ymm3, %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vsubsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vucomish %xmm2, %xmm1"); + asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1"); + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3700,6 +4388,464 @@ int main(void) asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + /* AVX512-FP16 */ + + asm volatile("vaddph %zmm3, %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vaddph %xmm3, %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vaddph %ymm3, %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vaddsh %xmm3, %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5"); + asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcomish %xmm2, %xmm1"); + asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtdq2ph %zmm2, %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtdq2ph %xmm2, %xmm1"); + asm volatile("vcvtdq2ph %ymm2, %xmm1"); + asm volatile("vcvtpd2ph %zmm2, %xmm1"); + asm volatile("vcvtpd2ph %xmm2, %xmm1"); + asm volatile("vcvtpd2ph %ymm2, %xmm1"); + asm volatile("vcvtph2dq %ymm2, %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2dq %xmm2, %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2dq %xmm2, %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2pd %xmm2, %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2pd %xmm2, %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2pd %xmm2, %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %ymm2, %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2psx %ymm2, %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2psx %xmm2, %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2psx %xmm2, %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2qq %xmm2, %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2qq %xmm2, %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2qq %xmm2, %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2udq %ymm2, %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2udq %xmm2, %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2udq %xmm2, %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uqq %xmm2, %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uqq %xmm2, %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uqq %xmm2, %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uw %zmm2, %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uw %xmm2, %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uw %ymm2, %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2w %zmm2, %zmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2w %xmm2, %xmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2w %ymm2, %ymm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2phx %zmm2, %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2phx %xmm2, %xmm1"); + asm volatile("vcvtps2phx %ymm2, %xmm1"); + asm volatile("vcvtqq2ph %zmm2, %xmm1"); + asm volatile("vcvtqq2ph %xmm2, %xmm1"); + asm volatile("vcvtqq2ph %ymm2, %xmm1"); + asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2usi %xmm1, %eax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvttph2dq %ymm2, %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2dq %xmm2, %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2dq %xmm2, %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2qq %xmm2, %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2qq %xmm2, %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2qq %xmm2, %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2udq %ymm2, %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2udq %xmm2, %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2udq %xmm2, %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uqq %xmm2, %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uqq %xmm2, %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uqq %xmm2, %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uw %zmm2, %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uw %xmm2, %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uw %ymm2, %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2w %zmm2, %zmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2w %xmm2, %xmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2w %ymm2, %ymm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttsh2si %xmm1, %eax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2usi %xmm1, %eax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtudq2ph %zmm2, %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtudq2ph %xmm2, %xmm1"); + asm volatile("vcvtudq2ph %ymm2, %xmm1"); + asm volatile("vcvtuqq2ph %zmm2, %xmm1"); + asm volatile("vcvtuqq2ph %xmm2, %xmm1"); + asm volatile("vcvtuqq2ph %ymm2, %xmm1"); + asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtuw2ph %zmm2, %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtuw2ph %xmm2, %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtuw2ph %ymm2, %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtw2ph %zmm2, %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtw2ph %xmm2, %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtw2ph %ymm2, %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vdivph %zmm3, %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vdivph %xmm3, %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vdivph %ymm3, %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vdivsh %xmm3, %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfpclassph $0x12, %zmm1, %k5"); + asm volatile("vfpclassph $0x12, %xmm1, %k5"); + asm volatile("vfpclassph $0x12, %ymm1, %k5"); + asm volatile("vfpclasssh $0x12, %xmm1, %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5"); + asm volatile("vgetexpph %zmm2, %zmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetexpph %xmm2, %xmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetexpph %ymm2, %ymm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, %zmm2, %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetmantph $0x12, %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetmantph $0x12, %ymm2, %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %zmm3, %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmaxph %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %ymm3, %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmaxsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %zmm3, %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vminph %xmm3, %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %ymm3, %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vminsh %xmm3, %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmovsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmovw %xmm1, %eax"); + asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovw %eax, %xmm1"); + asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmulph %zmm3, %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmulph %xmm3, %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmulph %ymm3, %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmulsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrcpph %zmm2, %zmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrcpph %xmm2, %xmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrcpph %ymm2, %ymm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrcpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, %zmm2, %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vreduceph $0x12, %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vreduceph $0x12, %ymm2, %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, %zmm2, %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, %ymm2, %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtph %zmm2, %zmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrsqrtph %xmm2, %xmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrsqrtph %ymm2, %ymm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %zmm3, %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vscalefph %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %ymm3, %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vscalefsh %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsqrtph %zmm2, %zmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vsqrtph %xmm2, %xmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vsqrtph %ymm2, %ymm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %zmm3, %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vsubph %xmm3, %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %ymm3, %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vsubsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vucomish %xmm2, %xmm1"); + asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1"); + #endif /* #ifndef __x86_64__ */ /* Prediction history reset */ -- GitLab From 16273fa4f3a2dc2c64dd8a28fe30f255a4de0e4c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 2 Dec 2021 11:50:29 +0200 Subject: [PATCH 0008/1586] x86/insn: Add AVX512-FP16 instructions to the x86 instruction decoder The x86 instruction decoder is used for both kernel instructions and user space instructions (e.g. uprobes, perf tools Intel PT), so it is good to update it with new instructions. Add AVX512-FP16 instructions to x86 instruction decoder. Note the EVEX map field is extended by 1 bit, and most instructions are in map 5 and map 6. Reference: Intel AVX512-FP16 Architecture Specification June 2021 Revision 1.0 Document Number: 347407-001US Example using perf tools' x86 instruction decoder test: $ perf test -v "x86 instruction decoder" |& grep vfcmaddcph | head -2 Decoded ok: 62 f6 6f 48 56 cb vfcmaddcph %zmm3,%zmm2,%zmm1 Decoded ok: 62 f6 6f 48 56 8c c8 78 56 34 12 vfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1 Signed-off-by: Adrian Hunter Signed-off-by: Borislav Petkov Acked-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20211202095029.2165714-7-adrian.hunter@intel.com --- arch/x86/include/asm/insn.h | 2 +- arch/x86/lib/x86-opcode-map.txt | 95 ++++++++++++++++++++++++--- tools/arch/x86/include/asm/insn.h | 2 +- tools/arch/x86/lib/x86-opcode-map.txt | 95 ++++++++++++++++++++++++--- 4 files changed, 176 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 05a6ab940f452..1b29f58f730fd 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -124,7 +124,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 591797a931bfc..d12d1358f96d2 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -828,9 +828,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -852,8 +852,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -877,18 +877,19 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) @@ -896,6 +897,84 @@ df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) EndTable +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) +EndTable + GrpTable: Grp1 0: ADD 1: OR diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index dc632b41f1356..65c0d9ce1e295 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -124,7 +124,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 591797a931bfc..d12d1358f96d2 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -828,9 +828,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -852,8 +852,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -877,18 +877,19 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) @@ -896,6 +897,84 @@ df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) EndTable +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) +EndTable + GrpTable: Grp1 0: ADD 1: OR -- GitLab From 4eda2bc3431ebe5c8361580877a4666d10ea28a7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 Sep 2021 16:43:21 +0200 Subject: [PATCH 0009/1586] x86/Kconfig: Select ARCH_SELECT_MEMORY_MODEL only if FLATMEM and SPARSEMEM are possible x86-64 supports only CONFIG_SPARSEMEM; there is nothing users can select. So enable the memory model selection (via CONFIG_ARCH_SELECT_MEMORY_MODEL) only if both, SPARSEMEM and FLATMEM are possible, which isn't the case on x86-64. Signed-off-by: David Hildenbrand Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20210929144321.50411-1-david@redhat.com --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ebe8fc76949af..a7e0bf47f2dbb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1637,7 +1637,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool y - depends on ARCH_SPARSEMEM_ENABLE + depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE config ARCH_MEMORY_PROBE bool "Enable sysfs memory/probe interface" -- GitLab From 1056c41634d400cf88d30580dc53270030cb0ace Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 13 Jan 2022 15:42:59 +0100 Subject: [PATCH 0010/1586] regmap-irq: Fix typo in comment This is not a resource manager, it is a "Resource managed version of regmap_add_irq_chip()". Fix the comment. Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20220113144259.355845-1-luca@lucaceresoli.net Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index d2656581a6085..d5604f4972967 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -1053,7 +1053,7 @@ int devm_regmap_add_irq_chip_fwnode(struct device *dev, EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip_fwnode); /** - * devm_regmap_add_irq_chip() - Resource manager regmap_add_irq_chip() + * devm_regmap_add_irq_chip() - Resource managed regmap_add_irq_chip() * * @dev: The device pointer on which irq_chip belongs to. * @map: The regmap for the device. -- GitLab From 6390d42c21efff0b4c10956a38e341f4e84ecd3d Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Sat, 15 Jan 2022 12:11:38 +0100 Subject: [PATCH 0011/1586] regulator: qcom_smd: fix for_each_child.cocci warnings drivers/regulator/qcom_smd-regulator.c:1318:1-33: WARNING: Function "for_each_available_child_of_node" should have of_node_put() before return around line 1321. Semantic patch information: False positives can be due to function calls within the for_each loop that may encapsulate an of_node_put. Generated by: scripts/coccinelle/iterators/for_each_child.cocci Fixes: 14e2976fbabd ("regulator: qcom_smd: Align probe function with rpmh-regulator") CC: Konrad Dybcio Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2201151210170.3051@hadrien Signed-off-by: Mark Brown --- drivers/regulator/qcom_smd-regulator.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 9fc666107a06c..8490aa8eecb1a 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -1317,8 +1317,10 @@ static int rpm_reg_probe(struct platform_device *pdev) for_each_available_child_of_node(dev->of_node, node) { vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL); - if (!vreg) + if (!vreg) { + of_node_put(node); return -ENOMEM; + } ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data); -- GitLab From 8e9977e48c7c2a49e09859456dcba12a8d804a51 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 19 Jan 2022 11:37:47 +0100 Subject: [PATCH 0012/1586] regulator: pfuze100: Add missing regulator names The valid regulator names for the subnodes does not match the ones mentioned in the description. PFUZE3000 & PFUZE3001 uses 'v33', 'vccsd' and 'vldo[1-4]' as well, so add them to the allowed node names. Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20220119103747.58305-1-alexander.stein@ew.tq-group.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/regulator/pfuze100.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.yaml b/Documentation/devicetree/bindings/regulator/pfuze100.yaml index f578e72778a7c..a26bbd68b7296 100644 --- a/Documentation/devicetree/bindings/regulator/pfuze100.yaml +++ b/Documentation/devicetree/bindings/regulator/pfuze100.yaml @@ -70,7 +70,11 @@ properties: $ref: "regulator.yaml#" type: object - "^(vsnvs|vref|vrefddr|swbst|coin)$": + "^vldo[1-4]$": + $ref: "regulator.yaml#" + type: object + + "^(vsnvs|vref|vrefddr|swbst|coin|v33|vccsd)$": $ref: "regulator.yaml#" type: object -- GitLab From 2da187304e556ac59cf2dacb323cc78ded988169 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 17 Jan 2022 11:07:47 +0000 Subject: [PATCH 0013/1586] spi: add bindings for microchip mpfs spi Add device tree bindings for the {q,}spi controller on the Microchip PolarFire SoC. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220117110755.3433142-7-conor.dooley@microchip.com Signed-off-by: Mark Brown --- .../bindings/spi/microchip,mpfs-spi.yaml | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml diff --git a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml new file mode 100644 index 0000000000000..ece261b8e963f --- /dev/null +++ b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/microchip,mpfs-spi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip MPFS {Q,}SPI Controller Device Tree Bindings + +maintainers: + - Conor Dooley + +allOf: + - $ref: spi-controller.yaml# + +properties: + compatible: + enum: + - microchip,mpfs-spi + - microchip,mpfs-qspi + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clock-names: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + #include "dt-bindings/clock/microchip,mpfs-clock.h" + spi@20108000 { + compatible = "microchip,mpfs-spi"; + reg = <0x20108000 0x1000>; + clocks = <&clkcfg CLK_SPI0>; + interrupt-parent = <&plic>; + interrupts = <54>; + spi-max-frequency = <25000000>; + }; +... -- GitLab From 5b177234e9fde7d4208e8163debc109b86e3f68d Mon Sep 17 00:00:00 2001 From: Guochun Mao Date: Tue, 18 Jan 2022 22:28:18 +0800 Subject: [PATCH 0014/1586] spi: spi-mtk-nor: improve device table for adding more capabilities Define a structure for adding more capabilities. Add a item extra_dummy_bit for new SoCs, due to design changed. Signed-off-by: Guochun Mao Signed-off-by: Zhen Zhang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220118142820.2729-3-guochun.mao@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mtk-nor.c | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index 5c93730615f8d..f5ff01f61f42a 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -95,6 +95,17 @@ #define CLK_TO_US(sp, clkcnt) DIV_ROUND_UP(clkcnt, sp->spi_freq / 1000000) +struct mtk_nor_caps { + u8 dma_bits; + + /* extra_dummy_bit is adding for the IP of new SoCs. + * Some new SoCs modify the timing of fetching registers' values + * and IDs of nor flash, they need a extra_dummy_bit which can add + * more clock cycles for fetching data. + */ + u8 extra_dummy_bit; +}; + struct mtk_nor { struct spi_controller *ctlr; struct device *dev; @@ -109,6 +120,7 @@ struct mtk_nor { bool has_irq; bool high_dma; struct completion op_done; + const struct mtk_nor_caps *caps; }; static inline void mtk_nor_rmw(struct mtk_nor *sp, u32 reg, u32 set, u32 clr) @@ -554,7 +566,12 @@ static int mtk_nor_spi_mem_prg(struct mtk_nor *sp, const struct spi_mem_op *op) } // trigger op - writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT); + if (rx_len) + writel(prg_len * BITS_PER_BYTE + sp->caps->extra_dummy_bit, + sp->base + MTK_NOR_REG_PRG_CNT); + else + writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT); + ret = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_PROGRAM, prg_len * BITS_PER_BYTE); if (ret) @@ -743,9 +760,19 @@ static const struct spi_controller_mem_ops mtk_nor_mem_ops = { .exec_op = mtk_nor_exec_op }; +const struct mtk_nor_caps mtk_nor_caps_mt8173 = { + .dma_bits = 32, + .extra_dummy_bit = 0, +}; + +const struct mtk_nor_caps mtk_nor_caps_mt8192 = { + .dma_bits = 36, + .extra_dummy_bit = 0, +}; + static const struct of_device_id mtk_nor_match[] = { - { .compatible = "mediatek,mt8192-nor", .data = (void *)36 }, - { .compatible = "mediatek,mt8173-nor", .data = (void *)32 }, + { .compatible = "mediatek,mt8173-nor", .data = &mtk_nor_caps_mt8173 }, + { .compatible = "mediatek,mt8192-nor", .data = &mtk_nor_caps_mt8192 }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, mtk_nor_match); @@ -754,10 +781,10 @@ static int mtk_nor_probe(struct platform_device *pdev) { struct spi_controller *ctlr; struct mtk_nor *sp; + struct mtk_nor_caps *caps; void __iomem *base; struct clk *spi_clk, *ctlr_clk, *axi_clk; int ret, irq; - unsigned long dma_bits; base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) @@ -775,10 +802,12 @@ static int mtk_nor_probe(struct platform_device *pdev) if (IS_ERR(axi_clk)) return PTR_ERR(axi_clk); - dma_bits = (unsigned long)of_device_get_match_data(&pdev->dev); - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_bits))) { - dev_err(&pdev->dev, "failed to set dma mask(%lu)\n", dma_bits); - return -EINVAL; + caps = (struct mtk_nor_caps *)of_device_get_match_data(&pdev->dev); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(caps->dma_bits)); + if (ret) { + dev_err(&pdev->dev, "failed to set dma mask(%u)\n", caps->dma_bits); + return ret; } ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp)); @@ -808,7 +837,8 @@ static int mtk_nor_probe(struct platform_device *pdev) sp->spi_clk = spi_clk; sp->ctlr_clk = ctlr_clk; sp->axi_clk = axi_clk; - sp->high_dma = (dma_bits > 32); + sp->caps = caps; + sp->high_dma = caps->dma_bits > 32; sp->buffer = dmam_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE + MTK_NOR_DMA_ALIGN, &sp->buffer_dma, GFP_KERNEL); -- GitLab From 4e8bfe5cdf77621cb4e7b196448ceeff20d9d6a6 Mon Sep 17 00:00:00 2001 From: Guochun Mao Date: Tue, 18 Jan 2022 22:28:19 +0800 Subject: [PATCH 0015/1586] spi: spi-mtk-nor: add new soc mt8186 support Add compatible mediatek,mt8186-nor implementation. Signed-off-by: Guochun Mao Signed-off-by: Zhen Zhang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220118142820.2729-4-guochun.mao@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mtk-nor.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index f5ff01f61f42a..e44fdf7c9e4ba 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -765,6 +765,11 @@ const struct mtk_nor_caps mtk_nor_caps_mt8173 = { .extra_dummy_bit = 0, }; +const struct mtk_nor_caps mtk_nor_caps_mt8186 = { + .dma_bits = 32, + .extra_dummy_bit = 1, +}; + const struct mtk_nor_caps mtk_nor_caps_mt8192 = { .dma_bits = 36, .extra_dummy_bit = 0, @@ -772,6 +777,7 @@ const struct mtk_nor_caps mtk_nor_caps_mt8192 = { static const struct of_device_id mtk_nor_match[] = { { .compatible = "mediatek,mt8173-nor", .data = &mtk_nor_caps_mt8173 }, + { .compatible = "mediatek,mt8186-nor", .data = &mtk_nor_caps_mt8186 }, { .compatible = "mediatek,mt8192-nor", .data = &mtk_nor_caps_mt8192 }, { /* sentinel */ } }; -- GitLab From 58b0a653b8dac40bbeb01a2c8a230aa8f84a7530 Mon Sep 17 00:00:00 2001 From: Guochun Mao Date: Tue, 18 Jan 2022 22:28:20 +0800 Subject: [PATCH 0016/1586] spi: spi-mtk-nor: add axi_s clock for mt8186 MT8186 needs axi_s clock for DMA feature. Signed-off-by: Guochun Mao Signed-off-by: Zhen Zhang Acked-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220118142820.2729-5-guochun.mao@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mtk-nor.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index e44fdf7c9e4ba..455b4dcb26e9a 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -115,6 +115,7 @@ struct mtk_nor { struct clk *spi_clk; struct clk *ctlr_clk; struct clk *axi_clk; + struct clk *axi_s_clk; unsigned int spi_freq; bool wbuf_en; bool has_irq; @@ -691,6 +692,7 @@ static void mtk_nor_disable_clk(struct mtk_nor *sp) clk_disable_unprepare(sp->spi_clk); clk_disable_unprepare(sp->ctlr_clk); clk_disable_unprepare(sp->axi_clk); + clk_disable_unprepare(sp->axi_s_clk); } static int mtk_nor_enable_clk(struct mtk_nor *sp) @@ -714,6 +716,14 @@ static int mtk_nor_enable_clk(struct mtk_nor *sp) return ret; } + ret = clk_prepare_enable(sp->axi_s_clk); + if (ret) { + clk_disable_unprepare(sp->spi_clk); + clk_disable_unprepare(sp->ctlr_clk); + clk_disable_unprepare(sp->axi_clk); + return ret; + } + return 0; } @@ -789,7 +799,7 @@ static int mtk_nor_probe(struct platform_device *pdev) struct mtk_nor *sp; struct mtk_nor_caps *caps; void __iomem *base; - struct clk *spi_clk, *ctlr_clk, *axi_clk; + struct clk *spi_clk, *ctlr_clk, *axi_clk, *axi_s_clk; int ret, irq; base = devm_platform_ioremap_resource(pdev, 0); @@ -808,6 +818,10 @@ static int mtk_nor_probe(struct platform_device *pdev) if (IS_ERR(axi_clk)) return PTR_ERR(axi_clk); + axi_s_clk = devm_clk_get_optional(&pdev->dev, "axi_s"); + if (IS_ERR(axi_s_clk)) + return PTR_ERR(axi_s_clk); + caps = (struct mtk_nor_caps *)of_device_get_match_data(&pdev->dev); ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(caps->dma_bits)); @@ -843,6 +857,7 @@ static int mtk_nor_probe(struct platform_device *pdev) sp->spi_clk = spi_clk; sp->ctlr_clk = ctlr_clk; sp->axi_clk = axi_clk; + sp->axi_s_clk = axi_s_clk; sp->caps = caps; sp->high_dma = caps->dma_bits > 32; sp->buffer = dmam_alloc_coherent(&pdev->dev, -- GitLab From ceab11a3c0d620d9ec2c032fd8014615cf7934ec Mon Sep 17 00:00:00 2001 From: Guochun Mao Date: Tue, 18 Jan 2022 22:28:17 +0800 Subject: [PATCH 0017/1586] spi: add mt8186-nor compatible string Add MT8186 spi-nor controller support. MT8186 needs a new clock name, axi_s, for spi nor axi slave bus clock. Signed-off-by: Guochun Mao Signed-off-by: Zhen Zhang Link: https://lore.kernel.org/r/20220118142820.2729-2-guochun.mao@mediatek.com Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml index 4e4694e3d5390..be3cc7faed534 100644 --- a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml +++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml @@ -30,6 +30,7 @@ properties: - mediatek,mt7622-nor - mediatek,mt7623-nor - mediatek,mt7629-nor + - mediatek,mt8186-nor - mediatek,mt8192-nor - mediatek,mt8195-nor - enum: @@ -49,6 +50,8 @@ properties: - description: clock used for controller - description: clock used for nor dma bus. this depends on hardware design, so this is optional. + - description: clock used for controller axi slave bus. + this depends on hardware design, so it is optional. clock-names: minItems: 2 @@ -56,6 +59,7 @@ properties: - const: spi - const: sf - const: axi + - const: axi_s required: - compatible -- GitLab From f1ba938e4f98941dc2b77795062e49444ec1fee1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 00:09:13 +0100 Subject: [PATCH 0018/1586] spi: s3c64xx: Delete unused boardfile helpers The helpers to use SPI host 1 and 2 are unused in the kernel and taking up space and maintenance hours. New systems should use device tree and not this, so delete the code. Cc: linux-samsung-soc@vger.kernel.org Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Signed-off-by: Linus Walleij Reviewed-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220118230915.157797-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- arch/arm/mach-s3c/Kconfig | 12 ---- arch/arm/mach-s3c/devs.c | 72 ----------------------- arch/arm/mach-s3c/setup-spi-s3c64xx.c | 9 --- arch/arm/mach-s3c/spi-core-s3c24xx.h | 6 -- include/linux/platform_data/spi-s3c64xx.h | 8 --- 5 files changed, 107 deletions(-) diff --git a/arch/arm/mach-s3c/Kconfig b/arch/arm/mach-s3c/Kconfig index 25606e668cf93..1899fc3f44fd7 100644 --- a/arch/arm/mach-s3c/Kconfig +++ b/arch/arm/mach-s3c/Kconfig @@ -191,18 +191,6 @@ config S3C64XX_DEV_SPI0 Compile in platform device definitions for S3C64XX's type SPI controller 0 -config S3C64XX_DEV_SPI1 - bool - help - Compile in platform device definitions for S3C64XX's type - SPI controller 1 - -config S3C64XX_DEV_SPI2 - bool - help - Compile in platform device definitions for S3C64XX's type - SPI controller 2 - config SAMSUNG_DEV_TS bool help diff --git a/arch/arm/mach-s3c/devs.c b/arch/arm/mach-s3c/devs.c index 06dec64848f9a..9f086aee862bc 100644 --- a/arch/arm/mach-s3c/devs.c +++ b/arch/arm/mach-s3c/devs.c @@ -1125,75 +1125,3 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0); } #endif /* CONFIG_S3C64XX_DEV_SPI0 */ - -#ifdef CONFIG_S3C64XX_DEV_SPI1 -static struct resource s3c64xx_spi1_resource[] = { - [0] = DEFINE_RES_MEM(S3C_PA_SPI1, SZ_256), - [1] = DEFINE_RES_IRQ(IRQ_SPI1), -}; - -struct platform_device s3c64xx_device_spi1 = { - .name = "s3c6410-spi", - .id = 1, - .num_resources = ARRAY_SIZE(s3c64xx_spi1_resource), - .resource = s3c64xx_spi1_resource, - .dev = { - .dma_mask = &samsung_device_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs) -{ - struct s3c64xx_spi_info pd; - - /* Reject invalid configuration */ - if (!num_cs || src_clk_nr < 0) { - pr_err("%s: Invalid SPI configuration\n", __func__); - return; - } - - pd.num_cs = num_cs; - pd.src_clk_nr = src_clk_nr; - pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi1_cfg_gpio; - - s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi1); -} -#endif /* CONFIG_S3C64XX_DEV_SPI1 */ - -#ifdef CONFIG_S3C64XX_DEV_SPI2 -static struct resource s3c64xx_spi2_resource[] = { - [0] = DEFINE_RES_MEM(S3C_PA_SPI2, SZ_256), - [1] = DEFINE_RES_IRQ(IRQ_SPI2), -}; - -struct platform_device s3c64xx_device_spi2 = { - .name = "s3c6410-spi", - .id = 2, - .num_resources = ARRAY_SIZE(s3c64xx_spi2_resource), - .resource = s3c64xx_spi2_resource, - .dev = { - .dma_mask = &samsung_device_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -void __init s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs) -{ - struct s3c64xx_spi_info pd; - - /* Reject invalid configuration */ - if (!num_cs || src_clk_nr < 0) { - pr_err("%s: Invalid SPI configuration\n", __func__); - return; - } - - pd.num_cs = num_cs; - pd.src_clk_nr = src_clk_nr; - pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi2_cfg_gpio; - - s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi2); -} -#endif /* CONFIG_S3C64XX_DEV_SPI2 */ diff --git a/arch/arm/mach-s3c/setup-spi-s3c64xx.c b/arch/arm/mach-s3c/setup-spi-s3c64xx.c index efcf78d415853..497aff71c29cc 100644 --- a/arch/arm/mach-s3c/setup-spi-s3c64xx.c +++ b/arch/arm/mach-s3c/setup-spi-s3c64xx.c @@ -16,12 +16,3 @@ int s3c64xx_spi0_cfg_gpio(void) return 0; } #endif - -#ifdef CONFIG_S3C64XX_DEV_SPI1 -int s3c64xx_spi1_cfg_gpio(void) -{ - s3c_gpio_cfgall_range(S3C64XX_GPC(4), 3, - S3C_GPIO_SFN(2), S3C_GPIO_PULL_UP); - return 0; -} -#endif diff --git a/arch/arm/mach-s3c/spi-core-s3c24xx.h b/arch/arm/mach-s3c/spi-core-s3c24xx.h index 057667469cc3d..919c5fd0c9afa 100644 --- a/arch/arm/mach-s3c/spi-core-s3c24xx.h +++ b/arch/arm/mach-s3c/spi-core-s3c24xx.h @@ -16,12 +16,6 @@ static inline void s3c24xx_spi_setname(char *name) #ifdef CONFIG_S3C64XX_DEV_SPI0 s3c64xx_device_spi0.name = name; #endif -#ifdef CONFIG_S3C64XX_DEV_SPI1 - s3c64xx_device_spi1.name = name; -#endif -#ifdef CONFIG_S3C64XX_DEV_SPI2 - s3c64xx_device_spi2.name = name; -#endif } #endif /* __PLAT_S3C_SPI_CORE_S3C24XX_H */ diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 773daf7915a37..19d690f34670f 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -52,17 +52,9 @@ struct s3c64xx_spi_info { */ extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, int num_cs); -extern void s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); -extern void s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); /* defined by architecture to configure gpio */ extern int s3c64xx_spi0_cfg_gpio(void); -extern int s3c64xx_spi1_cfg_gpio(void); -extern int s3c64xx_spi2_cfg_gpio(void); extern struct s3c64xx_spi_info s3c64xx_spi0_pdata; -extern struct s3c64xx_spi_info s3c64xx_spi1_pdata; -extern struct s3c64xx_spi_info s3c64xx_spi2_pdata; #endif /*__SPI_S3C64XX_H */ -- GitLab From 3b5529ae7f3578da633e8ae2ec0715a55a248f9f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 00:09:14 +0100 Subject: [PATCH 0019/1586] spi: s3c64xx: Drop custom gpio setup argument The SPI0 platform population function was taking a custom gpio setup callback but the only user pass NULL as argument so drop this argument. Cc: linux-samsung-soc@vger.kernel.org Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Signed-off-by: Linus Walleij Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20220118230915.157797-2-linus.walleij@linaro.org Signed-off-by: Mark Brown --- arch/arm/mach-s3c/devs.c | 5 ++--- arch/arm/mach-s3c/mach-crag6410.c | 2 +- include/linux/platform_data/spi-s3c64xx.h | 4 +--- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-s3c/devs.c b/arch/arm/mach-s3c/devs.c index 9f086aee862bc..1e266fc24f9b7 100644 --- a/arch/arm/mach-s3c/devs.c +++ b/arch/arm/mach-s3c/devs.c @@ -1107,8 +1107,7 @@ struct platform_device s3c64xx_device_spi0 = { }, }; -void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs) +void __init s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs) { struct s3c64xx_spi_info pd; @@ -1120,7 +1119,7 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, pd.num_cs = num_cs; pd.src_clk_nr = src_clk_nr; - pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio; + pd.cfg_gpio = s3c64xx_spi0_cfg_gpio; s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0); } diff --git a/arch/arm/mach-s3c/mach-crag6410.c b/arch/arm/mach-s3c/mach-crag6410.c index 4a12c75d407fc..41f0aba2d2fd0 100644 --- a/arch/arm/mach-s3c/mach-crag6410.c +++ b/arch/arm/mach-s3c/mach-crag6410.c @@ -856,7 +856,7 @@ static void __init crag6410_machine_init(void) i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1)); samsung_keypad_set_platdata(&crag6410_keypad_data); - s3c64xx_spi0_set_platdata(NULL, 0, 2); + s3c64xx_spi0_set_platdata(0, 2); pwm_add_table(crag6410_pwm_lookup, ARRAY_SIZE(crag6410_pwm_lookup)); platform_add_devices(crag6410_devices, ARRAY_SIZE(crag6410_devices)); diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 19d690f34670f..10890a4b55b9a 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -43,15 +43,13 @@ struct s3c64xx_spi_info { /** * s3c64xx_spi_set_platdata - SPI Controller configure callback by the board * initialization code. - * @cfg_gpio: Pointer to gpio setup function. * @src_clk_nr: Clock the SPI controller is to use to generate SPI clocks. * @num_cs: Number of elements in the 'cs' array. * * Call this from machine init code for each SPI Controller that * has some chips attached to it. */ -extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); +extern void s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs); /* defined by architecture to configure gpio */ extern int s3c64xx_spi0_cfg_gpio(void); -- GitLab From a45cf3cc72dd9cfde9db8af32cdf9c431f53f9bc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 00:09:15 +0100 Subject: [PATCH 0020/1586] spi: s3c64xx: Convert to use GPIO descriptors Convert the S3C64xx SPI host to use GPIO descriptors. Provide GPIO descriptor tables for the one user with CS 0 and 1. Cc: linux-samsung-soc@vger.kernel.org Cc: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sam Protsenko Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220118230915.157797-3-linus.walleij@linaro.org Signed-off-by: Mark Brown --- arch/arm/mach-s3c/mach-crag6410-module.c | 13 ------ arch/arm/mach-s3c/mach-crag6410.c | 11 +++++ drivers/spi/spi-s3c64xx.c | 53 ++++++----------------- include/linux/platform_data/spi-s3c64xx.h | 2 - 4 files changed, 24 insertions(+), 55 deletions(-) diff --git a/arch/arm/mach-s3c/mach-crag6410-module.c b/arch/arm/mach-s3c/mach-crag6410-module.c index 407ad493493e3..5d1d4b67a4b77 100644 --- a/arch/arm/mach-s3c/mach-crag6410-module.c +++ b/arch/arm/mach-s3c/mach-crag6410-module.c @@ -32,10 +32,6 @@ #include "crag6410.h" -static struct s3c64xx_spi_csinfo wm0010_spi_csinfo = { - .line = S3C64XX_GPC(3), -}; - static struct wm0010_pdata wm0010_pdata = { .gpio_reset = S3C64XX_GPN(6), .reset_active_high = 1, /* Active high for Glenfarclas Rev 2 */ @@ -49,7 +45,6 @@ static struct spi_board_info wm1253_devs[] = { .chip_select = 0, .mode = SPI_MODE_0, .irq = S3C_EINT(4), - .controller_data = &wm0010_spi_csinfo, .platform_data = &wm0010_pdata, }, }; @@ -62,7 +57,6 @@ static struct spi_board_info balblair_devs[] = { .chip_select = 0, .mode = SPI_MODE_0, .irq = S3C_EINT(4), - .controller_data = &wm0010_spi_csinfo, .platform_data = &wm0010_pdata, }, }; @@ -229,10 +223,6 @@ static struct arizona_pdata wm5102_reva_pdata = { }, }; -static struct s3c64xx_spi_csinfo codec_spi_csinfo = { - .line = S3C64XX_GPN(5), -}; - static struct spi_board_info wm5102_reva_spi_devs[] = { [0] = { .modalias = "wm5102", @@ -242,7 +232,6 @@ static struct spi_board_info wm5102_reva_spi_devs[] = { .mode = SPI_MODE_0, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, - .controller_data = &codec_spi_csinfo, .platform_data = &wm5102_reva_pdata, }, }; @@ -275,7 +264,6 @@ static struct spi_board_info wm5102_spi_devs[] = { .mode = SPI_MODE_0, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, - .controller_data = &codec_spi_csinfo, .platform_data = &wm5102_pdata, }, }; @@ -298,7 +286,6 @@ static struct spi_board_info wm5110_spi_devs[] = { .mode = SPI_MODE_0, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, - .controller_data = &codec_spi_csinfo, .platform_data = &wm5102_reva_pdata, }, }; diff --git a/arch/arm/mach-s3c/mach-crag6410.c b/arch/arm/mach-s3c/mach-crag6410.c index 41f0aba2d2fd0..e3e0fe897bccb 100644 --- a/arch/arm/mach-s3c/mach-crag6410.c +++ b/arch/arm/mach-s3c/mach-crag6410.c @@ -825,6 +825,15 @@ static const struct gpio_led_platform_data gpio_leds_pdata = { static struct dwc2_hsotg_plat crag6410_hsotg_pdata; +static struct gpiod_lookup_table crag_spi0_gpiod_table = { + .dev_id = "s3c6410-spi.0", + .table = { + GPIO_LOOKUP_IDX("GPIOC", 3, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("GPION", 5, "cs", 1, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init crag6410_machine_init(void) { /* Open drain IRQs need pullups */ @@ -856,6 +865,8 @@ static void __init crag6410_machine_init(void) i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1)); samsung_keypad_set_platdata(&crag6410_keypad_data); + + gpiod_add_lookup_table(&crag_spi0_gpiod_table); s3c64xx_spi0_set_platdata(0, 2); pwm_add_table(crag6410_pwm_lookup, ARRAY_SIZE(crag6410_pwm_lookup)); diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 8755cd85e83ce..3e42cdb19d272 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -13,10 +13,8 @@ #include #include #include -#include #include #include -#include #include @@ -656,7 +654,11 @@ static int s3c64xx_spi_prepare_message(struct spi_master *master, struct s3c64xx_spi_csinfo *cs = spi->controller_data; /* Configure feedback delay */ - writel(cs->fb_delay & 0x3, sdd->regs + S3C64XX_SPI_FB_CLK); + if (!cs) + /* No delay if not defined */ + writel(0, sdd->regs + S3C64XX_SPI_FB_CLK); + else + writel(cs->fb_delay & 0x3, sdd->regs + S3C64XX_SPI_FB_CLK); return 0; } @@ -830,34 +832,16 @@ static int s3c64xx_spi_setup(struct spi_device *spi) if (spi->dev.of_node) { cs = s3c64xx_get_slave_ctrldata(spi); spi->controller_data = cs; - } else if (cs) { - /* On non-DT platforms the SPI core will set spi->cs_gpio - * to -ENOENT. The GPIO pin used to drive the chip select - * is defined by using platform data so spi->cs_gpio value - * has to be override to have the proper GPIO pin number. - */ - spi->cs_gpio = cs->line; } - if (IS_ERR_OR_NULL(cs)) { + /* NULL is fine, we just avoid using the FB delay (=0) */ + if (IS_ERR(cs)) { dev_err(&spi->dev, "No CS for SPI(%d)\n", spi->chip_select); return -ENODEV; } - if (!spi_get_ctldata(spi)) { - if (gpio_is_valid(spi->cs_gpio)) { - err = gpio_request_one(spi->cs_gpio, GPIOF_OUT_INIT_HIGH, - dev_name(&spi->dev)); - if (err) { - dev_err(&spi->dev, - "Failed to get /CS gpio [%d]: %d\n", - spi->cs_gpio, err); - goto err_gpio_req; - } - } - + if (!spi_get_ctldata(spi)) spi_set_ctldata(spi, cs); - } pm_runtime_get_sync(&sdd->pdev->dev); @@ -909,11 +893,9 @@ setup_exit: /* setup() returns with device de-selected */ s3c64xx_spi_set_cs(spi, false); - if (gpio_is_valid(spi->cs_gpio)) - gpio_free(spi->cs_gpio); spi_set_ctldata(spi, NULL); -err_gpio_req: + /* This was dynamically allocated on the DT path */ if (spi->dev.of_node) kfree(cs); @@ -924,19 +906,9 @@ static void s3c64xx_spi_cleanup(struct spi_device *spi) { struct s3c64xx_spi_csinfo *cs = spi_get_ctldata(spi); - if (gpio_is_valid(spi->cs_gpio)) { - gpio_free(spi->cs_gpio); - if (spi->dev.of_node) - kfree(cs); - else { - /* On non-DT platforms, the SPI core sets - * spi->cs_gpio to -ENOENT and .setup() - * overrides it with the GPIO pin value - * passed using platform data. - */ - spi->cs_gpio = -ENOENT; - } - } + /* This was dynamically allocated on the DT path */ + if (spi->dev.of_node) + kfree(cs); spi_set_ctldata(spi, NULL); } @@ -1131,6 +1103,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev) master->prepare_message = s3c64xx_spi_prepare_message; master->transfer_one = s3c64xx_spi_transfer_one; master->num_chipselect = sci->num_cs; + master->use_gpio_descriptors = true; master->dma_alignment = 8; master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) | SPI_BPW_MASK(8); diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 10890a4b55b9a..5df1ace6d2c98 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -16,7 +16,6 @@ struct platform_device; * struct s3c64xx_spi_csinfo - ChipSelect description * @fb_delay: Slave specific feedback delay. * Refer to FB_CLK_SEL register definition in SPI chapter. - * @line: Custom 'identity' of the CS line. * * This is per SPI-Slave Chipselect information. * Allocate and initialize one in machine init code and make the @@ -24,7 +23,6 @@ struct platform_device; */ struct s3c64xx_spi_csinfo { u8 fb_delay; - unsigned line; }; /** -- GitLab From 7f2a3cf4e6077a1525092f114be7819e505773a1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 19 Jan 2022 01:09:14 +0100 Subject: [PATCH 0021/1586] spi: s3c24xx: Convert to GPIO descriptors This driver has a bunch of custom oldstyle GPIO number-passing fields and a custom set-up callback. The good thing is: nothing in the kernel is using it. Convert the driver to use GPIO descriptors with a SPI_MASTER_GPIO_SS flag so that the local CS callback also get invoked as the hardware needs this. New users of this driver can provide GPIO descriptor tables like the other converted drivers. Cc: linux-samsung-soc@vger.kernel.org Cc: Krzysztof Kozlowski Cc: Sylwester Nawrocki Signed-off-by: Linus Walleij Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220119000914.192553-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-s3c24xx.c | 47 +++---------------------------------- include/linux/spi/s3c24xx.h | 5 ---- 2 files changed, 3 insertions(+), 49 deletions(-) diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c index d6f51695ca5b2..660aa866af06f 100644 --- a/drivers/spi/spi-s3c24xx.c +++ b/drivers/spi/spi-s3c24xx.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -62,9 +61,6 @@ struct s3c24xx_spi { unsigned char fiq_inuse; unsigned char fiq_claimed; - void (*set_cs)(struct s3c2410_spi_info *spi, - int cs, int pol); - /* data buffers */ const unsigned char *tx; unsigned char *rx; @@ -84,29 +80,21 @@ static inline struct s3c24xx_spi *to_hw(struct spi_device *sdev) return spi_master_get_devdata(sdev->master); } -static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol) -{ - gpio_set_value(spi->pin_cs, pol); -} - static void s3c24xx_spi_chipsel(struct spi_device *spi, int value) { struct s3c24xx_spi_devstate *cs = spi->controller_state; struct s3c24xx_spi *hw = to_hw(spi); - unsigned int cspol = spi->mode & SPI_CS_HIGH ? 1 : 0; /* change the chipselect state and the state of the spi engine clock */ switch (value) { case BITBANG_CS_INACTIVE: - hw->set_cs(hw->pdata, spi->chip_select, cspol^1); writeb(cs->spcon, hw->regs + S3C2410_SPCON); break; case BITBANG_CS_ACTIVE: writeb(cs->spcon | S3C2410_SPCON_ENSCK, hw->regs + S3C2410_SPCON); - hw->set_cs(hw->pdata, spi->chip_select, cspol); break; } } @@ -452,14 +440,6 @@ static void s3c24xx_spi_initialsetup(struct s3c24xx_spi *hw) writeb(0xff, hw->regs + S3C2410_SPPRE); writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN); writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON); - - if (hw->pdata) { - if (hw->set_cs == s3c24xx_spi_gpiocs) - gpio_direction_output(hw->pdata->pin_cs, 1); - - if (hw->pdata->gpio_setup) - hw->pdata->gpio_setup(hw->pdata, 1); - } } static int s3c24xx_spi_probe(struct platform_device *pdev) @@ -502,6 +482,9 @@ static int s3c24xx_spi_probe(struct platform_device *pdev) master->num_chipselect = hw->pdata->num_cs; master->bus_num = pdata->bus_num; master->bits_per_word_mask = SPI_BPW_MASK(8); + /* we need to call the local chipselect callback */ + master->flags = SPI_MASTER_GPIO_SS; + master->use_gpio_descriptors = true; /* setup the state for the bitbang driver */ @@ -541,27 +524,6 @@ static int s3c24xx_spi_probe(struct platform_device *pdev) goto err_no_pdata; } - /* setup any gpio we can */ - - if (!pdata->set_cs) { - if (pdata->pin_cs < 0) { - dev_err(&pdev->dev, "No chipselect pin\n"); - err = -EINVAL; - goto err_register; - } - - err = devm_gpio_request(&pdev->dev, pdata->pin_cs, - dev_name(&pdev->dev)); - if (err) { - dev_err(&pdev->dev, "Failed to get gpio for cs\n"); - goto err_register; - } - - hw->set_cs = s3c24xx_spi_gpiocs; - gpio_direction_output(pdata->pin_cs, 1); - } else - hw->set_cs = pdata->set_cs; - s3c24xx_spi_initialsetup(hw); /* register our spi controller */ @@ -604,9 +566,6 @@ static int s3c24xx_spi_suspend(struct device *dev) if (ret) return ret; - if (hw->pdata && hw->pdata->gpio_setup) - hw->pdata->gpio_setup(hw->pdata, 0); - clk_disable(hw->clk); return 0; } diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h index 440a715931629..9b8bb22d5b0cf 100644 --- a/include/linux/spi/s3c24xx.h +++ b/include/linux/spi/s3c24xx.h @@ -10,14 +10,9 @@ #define __LINUX_SPI_S3C24XX_H __FILE__ struct s3c2410_spi_info { - int pin_cs; /* simple gpio cs */ unsigned int num_cs; /* total chipselects */ int bus_num; /* bus number to use. */ - unsigned int use_fiq:1; /* use fiq */ - - void (*gpio_setup)(struct s3c2410_spi_info *spi, int enable); - void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); }; extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on); -- GitLab From 9d7c48506518684847bc17d4e3f0a103d83aa5c8 Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Fri, 16 Jul 2021 22:00:34 +0200 Subject: [PATCH 0022/1586] tools: cpupower: fix typo in cpupower-idle-set(1) manpage The tools name was wrong in the SYNTAX section of the manpage it should read "idle-set" instead of "idle-info". Signed-off-by: Andreas Rammhold Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-idle-set.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 21916cff7516a..8cef3c71e19e7 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -4,7 +4,7 @@ cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options .SH "SYNTAX" .LP -cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] +cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP] .SH "DESCRIPTION" .LP The cpupower idle\-set subcommand allows to set cpu idle, also called cpu -- GitLab From 101025ff8e47d3c938ad2ae646a1794b9a8aa730 Mon Sep 17 00:00:00 2001 From: ozkanonur Date: Thu, 13 Jan 2022 00:04:21 +0300 Subject: [PATCH 0023/1586] tools/power/cpupower/{ToDo => TODO}: Rename the todo file Renamed the to-do file to 'TODO' instead of 'ToDo' to comply with the naming standard. Signed-off-by: ozkanonur Signed-off-by: Shuah Khan --- tools/power/cpupower/{ToDo => TODO} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/power/cpupower/{ToDo => TODO} (100%) diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/TODO similarity index 100% rename from tools/power/cpupower/ToDo rename to tools/power/cpupower/TODO -- GitLab From 2e1f8e55f9e054b4a49ffc06c7e33b5d4725f05e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 30 Sep 2021 14:40:38 +0200 Subject: [PATCH 0024/1586] x86/paravirt: Use %rip-relative addressing in hook calls While using a plain (constant) address works, its use needlessly invokes a SIB addressing mode, making every call site one byte larger than necessary: ff 14 25 98 89 42 82 call *0xffffffff82428998 Instead of using an "i" constraint with address-of operator and a 'c' operand modifier, simply use an ordinary "m" constraint, which the 64-bit compiler will translate to %rip-relative addressing: ff 15 62 fb d2 00 call *0xd2fb62(%rip) # ffffffff82428998 This way the compiler is also told the truth about operand usage - the memory location gets actually read, after all. 32-bit code generation is unaffected by the change. [ bp: Remove "we", add examples. ] Signed-off-by: Jan Beulich Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/b8192e8a-13ef-6ac6-6364-8ba58992cd1d@suse.com --- arch/x86/include/asm/paravirt_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a69012e1903f1..e1591467668ed 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void); #define paravirt_type(op) \ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(pv_ops.op)) + [paravirt_opptr] "m" (pv_ops.op) #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) @@ -316,7 +316,7 @@ int paravirt_disable_iospace(void); */ #define PARAVIRT_CALL \ ANNOTATE_RETPOLINE_SAFE \ - "call *%c[paravirt_opptr];" + "call *%[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt -- GitLab From e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Fri, 7 Jan 2022 12:48:44 -0800 Subject: [PATCH 0025/1586] f2fs: move f2fs to use reader-unfair rwsems f2fs rw_semaphores work better if writers can starve readers, especially for the checkpoint thread, because writers are strictly more important than reader threads. This prevents significant priority inversion between low-priority readers that blocked while trying to acquire the read lock and a second acquisition of the write lock that might be blocking high priority work. Signed-off-by: Tim Murray Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 34 ++++++------- fs/f2fs/compress.c | 6 +-- fs/f2fs/data.c | 50 +++++++++---------- fs/f2fs/dir.c | 12 ++--- fs/f2fs/f2fs.h | 110 ++++++++++++++++++++++++++++++++++-------- fs/f2fs/file.c | 112 +++++++++++++++++++++---------------------- fs/f2fs/gc.c | 46 +++++++++--------- fs/f2fs/inline.c | 4 +- fs/f2fs/namei.c | 34 ++++++------- fs/f2fs/node.c | 84 ++++++++++++++++---------------- fs/f2fs/recovery.c | 4 +- fs/f2fs/segment.c | 44 ++++++++--------- fs/f2fs/super.c | 56 +++++++++++----------- fs/f2fs/sysfs.c | 4 +- fs/f2fs/verity.c | 4 +- fs/f2fs/xattr.c | 12 ++--- 16 files changed, 342 insertions(+), 274 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 982f0170639fc..deeda95688f03 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -351,13 +351,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* if locked failed, cp will flush dirty pages instead */ - if (!down_write_trylock(&sbi->cp_global_sem)) + if (!f2fs_down_write_trylock(&sbi->cp_global_sem)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -1159,7 +1159,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) if (!is_journalled_quota(sbi)) return false; - if (!down_write_trylock(&sbi->quota_sem)) + if (!f2fs_down_write_trylock(&sbi->quota_sem)) return true; if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { ret = false; @@ -1171,7 +1171,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { ret = true; } - up_write(&sbi->quota_sem); + f2fs_up_write(&sbi->quota_sem); return ret; } @@ -1228,10 +1228,10 @@ retry_flush_dents: * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. inode->i_blocks can be updated. */ - down_write(&sbi->node_change); + f2fs_down_write(&sbi->node_change); if (get_pages(sbi, F2FS_DIRTY_IMETA)) { - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) @@ -1241,15 +1241,15 @@ retry_flush_dents: } retry_flush_nodes: - down_write(&sbi->node_write); + f2fs_down_write(&sbi->node_write); if (get_pages(sbi, F2FS_DIRTY_NODES)) { - up_write(&sbi->node_write); + f2fs_up_write(&sbi->node_write); atomic_inc(&sbi->wb_sync_req[NODE]); err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); atomic_dec(&sbi->wb_sync_req[NODE]); if (err) { - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); f2fs_unlock_all(sbi); return err; } @@ -1262,13 +1262,13 @@ retry_flush_nodes: * dirty node blocks and some checkpoint values by block allocation. */ __prepare_cp_block(sbi); - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); return err; } static void unblock_operations(struct f2fs_sb_info *sbi) { - up_write(&sbi->node_write); + f2fs_up_write(&sbi->node_write); f2fs_unlock_all(sbi); } @@ -1612,7 +1612,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_warn(sbi, "Start checkpoint disabled!"); } if (cpc->reason != CP_RESIZE) - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->cp_global_sem); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1693,7 +1693,7 @@ stop: trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: if (cpc->reason != CP_RESIZE) - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); return err; } @@ -1741,9 +1741,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi) struct cp_control cpc = { .reason = CP_SYNC, }; int err; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); return err; } @@ -1831,9 +1831,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) { int ret; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); ret = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); return ret; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d0c3aeba59454..67bac2792e571 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1267,7 +1267,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, * checkpoint. This can only happen to quota writes which can cause * the below discard race condition. */ - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } else if (!f2fs_trylock_op(sbi)) { goto out_free; } @@ -1384,7 +1384,7 @@ unlock_continue: f2fs_put_dnode(&dn); if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); @@ -1410,7 +1410,7 @@ out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); out_free: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c417864c66ae..0f124e8de1d4e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -590,7 +590,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - down_write(&io->io_rwsem); + f2fs_down_write(&io->io_rwsem); /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { @@ -601,7 +601,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); - up_write(&io->io_rwsem); + f2fs_up_write(&io->io_rwsem); } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, @@ -616,9 +616,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - down_read(&io->io_rwsem); + f2fs_down_read(&io->io_rwsem); ret = __has_merged_page(io->bio, inode, page, ino); - up_read(&io->io_rwsem); + f2fs_up_read(&io->io_rwsem); } if (ret) __f2fs_submit_merged_write(sbi, type, temp); @@ -742,9 +742,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) f2fs_bug_on(sbi, 1); - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_add_tail(&be->list, &io->bio_list); - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } static void del_bio_entry(struct bio_entry *be) @@ -766,7 +766,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct list_head *head = &io->bio_list; struct bio_entry *be; - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (be->bio != *bio) continue; @@ -790,7 +790,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, __submit_bio(sbi, *bio, DATA); break; } - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } if (ret) { @@ -816,7 +816,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, if (list_empty(head)) continue; - down_read(&io->bio_list_lock); + f2fs_down_read(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); @@ -826,14 +826,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, if (found) break; } - up_read(&io->bio_list_lock); + f2fs_up_read(&io->bio_list_lock); if (!found) continue; found = false; - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); @@ -846,7 +846,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, break; } } - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } if (found) @@ -906,7 +906,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_bug_on(sbi, is_read_io(fio->op)); - down_write(&io->io_rwsem); + f2fs_down_write(&io->io_rwsem); next: if (fio->in_list) { spin_lock(&io->io_lock); @@ -973,7 +973,7 @@ out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); - up_write(&io->io_rwsem); + f2fs_up_write(&io->io_rwsem); } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, @@ -1383,9 +1383,9 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) - down_read(&sbi->node_change); + f2fs_down_read(&sbi->node_change); else - up_read(&sbi->node_change); + f2fs_up_read(&sbi->node_change); } else { if (lock) f2fs_lock_op(sbi); @@ -2749,13 +2749,13 @@ write: * the below discard race condition. */ if (IS_NOQUOTA(inode)) - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto done; } @@ -3213,14 +3213,14 @@ void f2fs_write_failed(struct inode *inode, loff_t to) /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ if (to > i_size && !f2fs_verity_in_progress(inode)) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_pagecache(inode, i_size); f2fs_truncate_blocks(inode, i_size, true); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -3721,13 +3721,13 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int end_sec = secidx + blkcnt / blk_per_sec; int ret = 0; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); for (; secidx < end_sec; secidx++) { - down_write(&sbi->pin_sem); + f2fs_down_write(&sbi->pin_sem); f2fs_lock_op(sbi); f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); @@ -3741,7 +3741,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, page = f2fs_get_lock_data_page(inode, blkidx, true); if (IS_ERR(page)) { - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); ret = PTR_ERR(page); goto done; } @@ -3754,7 +3754,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, ret = filemap_fdatawrite(inode->i_mapping); - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); if (ret) break; @@ -3765,7 +3765,7 @@ done: clear_inode_flag(inode, FI_ALIGNED_WRITE); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1820e9c106f7d..011df7058c42b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -766,7 +766,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -793,7 +793,7 @@ add_dentry: f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); f2fs_put_page(dentry_page, 1); @@ -858,7 +858,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) struct page *page; int err = 0; - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -869,7 +869,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) clear_inode_flag(inode, FI_NEW_INODE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); return err; } @@ -877,7 +877,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); @@ -888,7 +888,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) f2fs_i_links_write(inode, false); f2fs_i_size_write(inode, 0); } - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) f2fs_add_orphan_inode(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb22fa91c2b26..8178a9152e496 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -123,6 +123,18 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +/* + * An implementation of an rwsem that is explicitly unfair to readers. This + * prevents priority inversion when a low-priority reader acquires the read lock + * while sleeping on the write lock but the write lock is needed by + * higher-priority clients. + */ + +struct f2fs_rwsem { + struct rw_semaphore internal_rwsem; + wait_queue_head_t read_waiters; +}; + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -752,7 +764,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ - struct rw_semaphore i_sem; /* protect fi info */ + struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ @@ -777,8 +789,8 @@ struct f2fs_inode_info { struct extent_tree *extent_tree; /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ - struct rw_semaphore i_gc_rwsem[2]; - struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ + struct f2fs_rwsem i_gc_rwsem[2]; + struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */ int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ @@ -904,7 +916,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat entry tree */ + struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ @@ -1017,7 +1029,7 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ - struct rw_semaphore curseg_lock; /* for preventing curseg change */ + struct f2fs_rwsem curseg_lock; /* for preventing curseg change */ block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ @@ -1201,11 +1213,11 @@ struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ - struct rw_semaphore io_rwsem; /* blocking op for bio */ + struct f2fs_rwsem io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ struct list_head bio_list; /* bio entry list head */ - struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ + struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -1571,7 +1583,7 @@ struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - struct rw_semaphore sb_lock; /* lock for raw super block */ + struct f2fs_rwsem sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ @@ -1591,7 +1603,7 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ - struct rw_semaphore io_order_lock; + struct f2fs_rwsem io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1599,10 +1611,10 @@ struct f2fs_sb_info { int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ - struct rw_semaphore node_write; /* locking node writes */ - struct rw_semaphore node_change; /* locking node change */ + struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */ + struct f2fs_rwsem cp_rwsem; /* blocking FS operations */ + struct f2fs_rwsem node_write; /* locking node writes */ + struct f2fs_rwsem node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -1662,7 +1674,7 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ - struct rw_semaphore quota_sem; /* blocking cp for flags */ + struct f2fs_rwsem quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1678,7 +1690,7 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct rw_semaphore gc_lock; /* + struct f2fs_rwsem gc_lock; /* * semaphore for GC, avoid * race between GC and GC or CP */ @@ -1698,7 +1710,7 @@ struct f2fs_sb_info { /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; - struct rw_semaphore pin_sem; + struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -2092,9 +2104,65 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem) +{ + init_rwsem(&sem->internal_rwsem); + init_waitqueue_head(&sem->read_waiters); +} + +static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem) +{ + return rwsem_is_locked(&sem->internal_rwsem); +} + +static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem) +{ + return rwsem_is_contended(&sem->internal_rwsem); +} + +static inline void f2fs_down_read(struct f2fs_rwsem *sem) +{ + wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem)); +} + +static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) +{ + return down_read_trylock(&sem->internal_rwsem); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#endif + +static inline void f2fs_up_read(struct f2fs_rwsem *sem) +{ + up_read(&sem->internal_rwsem); +} + +static inline void f2fs_down_write(struct f2fs_rwsem *sem) +{ + down_write(&sem->internal_rwsem); +} + +static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) +{ + return down_write_trylock(&sem->internal_rwsem); +} + +static inline void f2fs_up_write(struct f2fs_rwsem *sem) +{ + up_write(&sem->internal_rwsem); + wake_up_all(&sem->read_waiters); +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + f2fs_down_read(&sbi->cp_rwsem); } static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) @@ -2103,22 +2171,22 @@ static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) f2fs_show_injection_info(sbi, FAULT_LOCK_OP); return 0; } - return down_read_trylock(&sbi->cp_rwsem); + return f2fs_down_read_trylock(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + f2fs_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + f2fs_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + f2fs_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c98ef6af97d1..6ccdd6e347e2c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -237,13 +237,13 @@ static void try_to_fix_pino(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); nid_t pino; - down_write(&fi->i_sem); + f2fs_down_write(&fi->i_sem); if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); file_got_pino(inode); } - up_write(&fi->i_sem); + f2fs_up_write(&fi->i_sem); } static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, @@ -318,9 +318,9 @@ go_write: * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - down_read(&F2FS_I(inode)->i_sem); + f2fs_down_read(&F2FS_I(inode)->i_sem); cp_reason = need_do_checkpoint(inode); - up_read(&F2FS_I(inode)->i_sem); + f2fs_up_read(&F2FS_I(inode)->i_sem); if (cp_reason) { /* all the dirty node pages should be flushed for POR */ @@ -958,7 +958,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, return err; } - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, attr->ia_size); @@ -970,7 +970,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) return err; @@ -1112,7 +1112,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_pagecache_range(inode, blk_start, blk_end - 1); @@ -1122,7 +1122,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) f2fs_unlock_op(sbi); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -1355,7 +1355,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); f2fs_lock_op(sbi); @@ -1365,7 +1365,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_unlock_op(sbi); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } @@ -1500,7 +1500,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); truncate_pagecache_range(inode, @@ -1514,7 +1514,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) { f2fs_unlock_op(sbi); filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1526,7 +1526,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, f2fs_unlock_op(sbi); filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1600,7 +1600,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); truncate_pagecache(inode, offset); @@ -1618,7 +1618,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_unlock_op(sbi); } filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ filemap_invalidate_lock(mapping); @@ -1674,13 +1674,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset, next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err && err != -ENODATA && err != -EAGAIN) goto out_err; } - down_write(&sbi->pin_sem); + f2fs_down_write(&sbi->pin_sem); f2fs_lock_op(sbi); f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); @@ -1690,7 +1690,7 @@ next_alloc: err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); file_dont_truncate(inode); - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); expanded += map.m_len; sec_len -= map.m_len; @@ -2020,7 +2020,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (ret) goto out; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2031,7 +2031,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -2044,7 +2044,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) /* add inode in inmem_list first and set atomic_file */ set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; @@ -2351,7 +2351,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) return err; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) goto got_it; @@ -2370,7 +2370,7 @@ got_it: 16)) err = -EFAULT; out_err: - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); mnt_drop_write_file(filp); return err; } @@ -2447,12 +2447,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; if (!sync) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO); @@ -2483,12 +2483,12 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) do_more: if (!range->sync) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, range->sync, true, false, @@ -2820,10 +2820,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_balance_fs(sbi, true); - down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); if (src != dst) { ret = -EBUSY; - if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) goto out_src; } @@ -2841,9 +2841,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_unlock_op(sbi); if (src != dst) - up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); out_src: - up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); out_unlock: if (src != dst) inode_unlock(dst); @@ -2938,7 +2938,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) end_segno = min(start_segno + range.segments, dev_end_segno); while (start_segno < end_segno) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } @@ -3215,9 +3215,9 @@ int f2fs_precache_extents(struct inode *inode) while (map.m_lblk < end) { map.m_len = end - map.m_lblk; - down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); - up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; @@ -3294,11 +3294,11 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) if (!vbuf) return -ENOMEM; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); count = utf16s_to_utf8s(sbi->raw_super->volume_name, ARRAY_SIZE(sbi->raw_super->volume_name), UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); if (copy_to_user((char __user *)arg, vbuf, min(FSLABEL_MAX, count))) @@ -3326,7 +3326,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) if (err) goto out; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); memset(sbi->raw_super->volume_name, 0, sizeof(sbi->raw_super->volume_name)); @@ -3336,7 +3336,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) err = f2fs_commit_super(sbi, false); - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); mnt_drop_write_file(filp); out: @@ -3462,7 +3462,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3499,7 +3499,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); out: inode_unlock(inode); @@ -3615,7 +3615,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3652,7 +3652,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (ret >= 0) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); @@ -3770,7 +3770,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (ret) goto err; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); ret = filemap_write_and_wait_range(mapping, range.start, @@ -3859,7 +3859,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) prev_block, len, range.flags); out: filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); file_end_write(filp); @@ -4291,12 +4291,12 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) trace_f2fs_direct_IO_enter(inode, iocb, count, READ); if (iocb->ki_flags & IOCB_NOWAIT) { - if (!down_read_trylock(&fi->i_gc_rwsem[READ])) { + if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { ret = -EAGAIN; goto out; } } else { - down_read(&fi->i_gc_rwsem[READ]); + f2fs_down_read(&fi->i_gc_rwsem[READ]); } /* @@ -4315,7 +4315,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) ret = iomap_dio_complete(dio); } - up_read(&fi->i_gc_rwsem[READ]); + f2fs_up_read(&fi->i_gc_rwsem[READ]); file_accessed(file); out: @@ -4497,12 +4497,12 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, goto out; } - if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) { + if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { ret = -EAGAIN; goto out; } - if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { - up_read(&fi->i_gc_rwsem[WRITE]); + if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { + f2fs_up_read(&fi->i_gc_rwsem[WRITE]); ret = -EAGAIN; goto out; } @@ -4511,9 +4511,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, if (ret) goto out; - down_read(&fi->i_gc_rwsem[WRITE]); + f2fs_down_read(&fi->i_gc_rwsem[WRITE]); if (do_opu) - down_read(&fi->i_gc_rwsem[READ]); + f2fs_down_read(&fi->i_gc_rwsem[READ]); } if (whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; @@ -4542,8 +4542,8 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, if (whint_mode == WHINT_MODE_OFF) iocb->ki_hint = hint; if (do_opu) - up_read(&fi->i_gc_rwsem[READ]); - up_read(&fi->i_gc_rwsem[WRITE]); + f2fs_up_read(&fi->i_gc_rwsem[READ]); + f2fs_up_read(&fi->i_gc_rwsem[WRITE]); if (ret < 0) goto out; @@ -4644,12 +4644,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't leave any preallocated blocks around past i_size. */ if (preallocated && i_size_read(inode) < target_size) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); if (!f2fs_truncate(inode)) file_dont_truncate(inode); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } else { file_dont_truncate(inode); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ee308a8de4327..0a6b0a8ae97ee 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -105,21 +105,21 @@ static int gc_thread_func(void *data) spin_unlock(&sbi->gc_urgent_high_lock); wait_ms = gc_th->urgent_sleep_time; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); goto do_gc; } if (foreground) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); goto do_gc; - } else if (!down_write_trylock(&sbi->gc_lock)) { + } else if (!f2fs_down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); stat_io_skip_bggc_count(sbi); goto next; } @@ -1230,7 +1230,7 @@ static int move_data_block(struct inode *inode, block_t bidx, fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) - down_write(&fio.sbi->io_order_lock); + f2fs_down_write(&fio.sbi->io_order_lock); mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), fio.old_blkaddr, false); @@ -1316,7 +1316,7 @@ recover_block: true, true, true); up_out: if (lfs_mode) - up_write(&fio.sbi->io_order_lock); + f2fs_up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out: @@ -1475,7 +1475,7 @@ next_step: special_file(inode->i_mode)) continue; - if (!down_write_trylock( + if (!f2fs_down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { iput(inode); sbi->skipped_gc_rwsem++; @@ -1488,7 +1488,7 @@ next_step: if (f2fs_post_read_required(inode)) { int err = ra_data_block(inode, start_bidx); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) { iput(inode); continue; @@ -1499,7 +1499,7 @@ next_step: data_page = f2fs_get_read_data_page(inode, start_bidx, REQ_RAHEAD, true); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -1518,14 +1518,14 @@ next_step: int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->i_gc_rwsem[READ])) { + if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) { sbi->skipped_gc_rwsem++; continue; } - if (!down_write_trylock( + if (!f2fs_down_write_trylock( &fi->i_gc_rwsem[WRITE])) { sbi->skipped_gc_rwsem++; - up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[READ]); continue; } locked = true; @@ -1548,8 +1548,8 @@ next_step: submitted++; if (locked) { - up_write(&fi->i_gc_rwsem[WRITE]); - up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[READ]); } stat_inc_data_blk_count(sbi, 1, gc_type); @@ -1807,7 +1807,7 @@ stop: reserved_segments(sbi), prefree_segments(sbi)); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); put_gc_inode(&gc_list); @@ -1936,7 +1936,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) long long block_count; int segs = secs * sbi->segs_per_sec; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); section_count = le32_to_cpu(raw_sb->section_count); segment_count = le32_to_cpu(raw_sb->segment_count); @@ -1957,7 +1957,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) cpu_to_le32(dev_segs + segs); } - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) @@ -2031,7 +2031,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); /* stop other GC */ - if (!down_write_trylock(&sbi->gc_lock)) + if (!f2fs_down_write_trylock(&sbi->gc_lock)) return -EAGAIN; /* stop CP to protect MAIN_SEC in free_segment_range */ @@ -2051,15 +2051,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out_unlock: f2fs_unlock_op(sbi); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); if (err) return err; set_sbi_flag(sbi, SBI_IS_RESIZEFS); freeze_super(sbi->sb); - down_write(&sbi->gc_lock); - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->cp_global_sem); spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + @@ -2104,8 +2104,8 @@ recover_out: spin_unlock(&sbi->stat_lock); } out_err: - up_write(&sbi->cp_global_sem); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4b5cefa3f90c1..a578bf83b803b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -629,7 +629,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, } if (inode) { - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, fname, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -658,7 +658,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, f2fs_update_parent_metadata(dir, inode, 0); fail: if (inode) - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); out: f2fs_put_page(ipage, 1); return err; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a728a0af9ce0c..0347c5780910a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -196,7 +196,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; int i, cold_count, hot_count; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; @@ -206,7 +206,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * break; } - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); if (i == cold_count + hot_count) return; @@ -299,19 +299,19 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, (!ext_cnt && !noext_cnt)) return; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = cold_count; i < cold_count + hot_count; i++) { if (is_extension_exist(name, extlist[i], false)) { - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); return; } } - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); for (i = 0; i < noext_cnt; i++) { if (is_extension_exist(name, noext[i], false)) { @@ -1023,11 +1023,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, new_page = NULL; new_inode->i_ctime = current_time(new_inode); - down_write(&F2FS_I(new_inode)->i_sem); + f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (old_dir_entry) f2fs_i_links_write(new_inode, false); f2fs_i_links_write(new_inode, false); - up_write(&F2FS_I(new_inode)->i_sem); + f2fs_up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) f2fs_add_orphan_inode(new_inode); @@ -1048,13 +1048,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, true); } - down_write(&F2FS_I(old_inode)->i_sem); + f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); - up_write(&F2FS_I(old_inode)->i_sem); + f2fs_up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); @@ -1214,38 +1214,38 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, /* update directory entry info of old dir inode */ f2fs_set_link(old_dir, old_entry, old_page, new_inode); - down_write(&F2FS_I(old_inode)->i_sem); + f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); - up_write(&F2FS_I(old_inode)->i_sem); + f2fs_up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); if (old_nlink) { - down_write(&F2FS_I(old_dir)->i_sem); + f2fs_down_write(&F2FS_I(old_dir)->i_sem); f2fs_i_links_write(old_dir, old_nlink > 0); - up_write(&F2FS_I(old_dir)->i_sem); + f2fs_up_write(&F2FS_I(old_dir)->i_sem); } f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); - down_write(&F2FS_I(new_inode)->i_sem); + f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (!new_dir_entry) file_lost_pino(new_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(new_inode, old_dir->i_ino); - up_write(&F2FS_I(new_inode)->i_sem); + f2fs_up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); if (new_nlink) { - down_write(&F2FS_I(new_dir)->i_sem); + f2fs_down_write(&F2FS_I(new_dir)->i_sem); f2fs_i_links_write(new_dir, new_nlink > 0); - up_write(&F2FS_I(new_dir)->i_sem); + f2fs_up_write(&F2FS_I(new_dir)->i_sem); } f2fs_mark_inode_dirty_sync(new_dir, false); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 50b2874e758c9..93512f8859d5d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -382,14 +382,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need; } @@ -399,11 +399,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -413,13 +413,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -431,14 +431,14 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct nat_entry *new, *e; /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ - if (rwsem_is_locked(&sbi->cp_global_sem)) + if (f2fs_rwsem_is_locked(&sbi->cp_global_sem)) return; new = __alloc_nat_entry(sbi, nid, false); if (!new) return; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) e = __init_nat_entry(nm_i, new, ne, false); @@ -447,7 +447,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); if (e != new) __free_nat_entry(new); } @@ -459,7 +459,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); @@ -508,7 +508,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -516,7 +516,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) + if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock)) return 0; spin_lock(&nm_i->nat_list_lock); @@ -538,7 +538,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } spin_unlock(&nm_i->nat_list_lock); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -560,13 +560,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->nid = nid; retry: /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return 0; } @@ -576,11 +576,11 @@ retry: * nat_tree_lock. Therefore, we should retry, if we failed to grab here * while not bothering checkpoint. */ - if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { + if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { down_read(&curseg->journal_rwsem); - } else if (rwsem_is_contended(&nm_i->nat_tree_lock) || + } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) || !down_read_trylock(&curseg->journal_rwsem)) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto retry; } @@ -589,15 +589,15 @@ retry: ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); } - up_read(&curseg->journal_rwsem); + up_read(&curseg->journal_rwsem); if (i >= 0) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto cache; } /* Fill node_info from nat page */ index = current_nat_addr(sbi, nid); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); page = f2fs_get_meta_page(sbi, index); if (IS_ERR(page)) @@ -1609,17 +1609,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; if (wbc->for_reclaim) { - if (!down_read_trylock(&sbi->node_write)) + if (!f2fs_down_read_trylock(&sbi->node_write)) goto redirty_out; } else { - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); unlock_page(page); return 0; } @@ -1627,7 +1627,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (__is_valid_data_blkaddr(ni.blk_addr) && !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto redirty_out; } @@ -1648,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); @@ -2225,14 +2225,14 @@ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) unsigned int i; bool ret = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) { ret = false; break; } } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return ret; } @@ -2415,7 +2415,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) unsigned int i, idx; nid_t nid; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) @@ -2438,7 +2438,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) out: scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, @@ -2473,7 +2473,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), @@ -2488,7 +2488,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, } if (ret) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2508,7 +2508,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, /* find free nids from current sum_pages */ scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2953,7 +2953,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { unsigned int valid = 0, nid_ofs = 0; @@ -2973,7 +2973,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) __update_nat_bits(nm_i, nat_ofs, valid); } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -3071,15 +3071,15 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * nat_cnt[DIRTY_NAT]. */ if (cpc->reason & CP_UMOUNT) { - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -3108,7 +3108,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ return err; @@ -3228,7 +3228,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + init_f2fs_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -3334,7 +3334,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -3364,7 +3364,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); if (nm_i->free_nid_bitmap) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9683c80ff8c24..10d152cfa58d1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -796,7 +796,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); @@ -845,7 +845,7 @@ skip: if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1dabc8244083d..216538b573310 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -471,7 +471,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) f2fs_balance_fs(sbi, true); - down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); @@ -483,7 +483,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) clear_inode_flag(inode, FI_ATOMIC_COMMIT); f2fs_unlock_op(sbi); - up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); return err; } @@ -521,7 +521,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) io_schedule(); finish_wait(&sbi->gc_thread->fggc_wq, &wait); } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, false, NULL_SEGNO); } } @@ -529,7 +529,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) { - int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; + int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS); unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA); unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); @@ -570,7 +570,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) /* there is background inflight IO or foreground operation recently */ if (is_inflight_io(sbi, REQ_TIME) || - (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) + (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem))) return; /* exceed periodical checkpoint timeout threshold */ @@ -2821,7 +2821,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) if (!sbi->am.atgc_enabled) return; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); @@ -2831,7 +2831,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) @@ -2982,7 +2982,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); @@ -3006,7 +3006,7 @@ unlock: type, segno, curseg->segno); mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, @@ -3038,23 +3038,23 @@ static void __allocate_new_section(struct f2fs_sb_info *sbi, void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); __allocate_new_section(sbi, type, force); up_write(&SIT_I(sbi)->sentry_lock); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } static const struct segment_allocation default_salloc_ops = { @@ -3192,9 +3192,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (sbi->discard_blks == 0) goto out; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); if (err) goto out; @@ -3431,7 +3431,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -3514,7 +3514,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, @@ -3550,7 +3550,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) - down_read(&fio->sbi->io_order_lock); + f2fs_down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio); @@ -3570,7 +3570,7 @@ reallocate: f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); if (keep_order) - up_read(&fio->sbi->io_order_lock); + f2fs_up_read(&fio->sbi->io_order_lock); } void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, @@ -3705,7 +3705,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; - down_write(&SM_I(sbi)->curseg_lock); + f2fs_down_write(&SM_I(sbi)->curseg_lock); if (!recover_curseg) { /* for recovery flow */ @@ -3774,7 +3774,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - up_write(&SM_I(sbi)->curseg_lock); + f2fs_up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -5258,7 +5258,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); - init_rwsem(&sm_info->curseg_lock); + init_f2fs_rwsem(&sm_info->curseg_lock); if (!f2fs_readonly(sbi->sb)) { err = f2fs_create_flush_cmd_control(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 76e6a3df9abac..9af6c20532ece 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1355,16 +1355,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); - init_rwsem(&fi->i_sem); + init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - init_rwsem(&fi->i_gc_rwsem[READ]); - init_rwsem(&fi->i_gc_rwsem[WRITE]); - init_rwsem(&fi->i_xattr_sem); + init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); + init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); + init_f2fs_rwsem(&fi->i_xattr_sem); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -2088,7 +2088,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -2110,7 +2110,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -2122,7 +2122,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); restore_flag: sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; @@ -2142,12 +2142,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) if (unlikely(retry < 0)) f2fs_warn(sbi, "checkpoint=enable has some unwritten data."); - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -2707,18 +2707,18 @@ int f2fs_quota_sync(struct super_block *sb, int type) /* * do_quotactl * f2fs_quota_sync - * down_read(quota_sem) + * f2fs_down_read(quota_sem) * dquot_writeback_dquots() * f2fs_dquot_commit * block_operation - * down_read(quota_sem) + * f2fs_down_read(quota_sem) */ f2fs_lock_op(sbi); - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = f2fs_quota_sync_file(sbi, cnt); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); f2fs_unlock_op(sbi); inode_unlock(dqopt->files[cnt]); @@ -2843,11 +2843,11 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); + f2fs_down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -2856,11 +2856,11 @@ static int f2fs_dquot_acquire(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -3601,14 +3601,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - init_rwsem(&sbi->io_order_lock); + init_f2fs_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); - init_rwsem(&sbi->sb_lock); - init_rwsem(&sbi->pin_sem); + init_f2fs_rwsem(&sbi->sb_lock); + init_f2fs_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -4067,11 +4067,11 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - init_rwsem(&sbi->gc_lock); + init_f2fs_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); - init_rwsem(&sbi->cp_global_sem); - init_rwsem(&sbi->node_write); - init_rwsem(&sbi->node_change); + init_f2fs_rwsem(&sbi->cp_global_sem); + init_f2fs_rwsem(&sbi->node_write); + init_f2fs_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); @@ -4092,18 +4092,18 @@ try_onemore: } for (j = HOT; j < n; j++) { - init_rwsem(&sbi->write_io[i][j].io_rwsem); + init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); sbi->write_io[i][j].sbi = sbi; sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_rwsem(&sbi->write_io[i][j].bio_list_lock); + init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); } } - init_rwsem(&sbi->cp_rwsem); - init_rwsem(&sbi->quota_sem); + init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index df406c16b2ebd..ce70e798d0d45 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -363,7 +363,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN) return -EINVAL; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); ret = f2fs_update_extension_list(sbi, name, hot, set); if (ret) @@ -373,7 +373,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (ret) f2fs_update_extension_list(sbi, name, hot, !set); out: - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); return ret ? ret : count; } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index fe5acdccaae19..3d793202cc9fe 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -208,7 +208,7 @@ cleanup: * from re-instantiating cached pages we are truncating (since unlike * normal file accesses, garbage collection isn't limited by i_size). */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); truncate_inode_pages(inode->i_mapping, inode->i_size); err2 = f2fs_truncate(inode); if (err2) { @@ -216,7 +216,7 @@ cleanup: err2); set_sbi_flag(sbi, SBI_NEED_FSCK); } - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); return err ?: err2; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8e5cd9c916ff1..c76c15086e5f5 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -525,10 +525,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - down_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, &entry, &base_addr, &base_size, &is_inline); - up_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -562,9 +562,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) int error; size_t rest = buffer_size; - down_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = read_all_xattrs(inode, NULL, &base_addr); - up_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -786,9 +786,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - down_write(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); - up_write(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_write(&F2FS_I(inode)->i_xattr_sem); f2fs_unlock_op(sbi); f2fs_update_time(sbi, REQ_TIME); -- GitLab From 7d19e3dab0002e527052b0aaf986e8c32e5537bf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Jan 2022 11:48:02 +0800 Subject: [PATCH 0026/1586] f2fs: fix to enable ATGC correctly via gc_idle sysfs interface It needs to assign sbi->gc_mode with GC_IDLE_AT rather than GC_AT when user tries to enable ATGC via gc_idle sysfs interface, fix it. Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Cc: Zhipeng Tan Signed-off-by: Jicheng Shao Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ce70e798d0d45..2bccdaedfb000 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -481,7 +481,7 @@ out: } else if (t == GC_IDLE_AT) { if (!sbi->am.atgc_enabled) return -EINVAL; - sbi->gc_mode = GC_AT; + sbi->gc_mode = GC_IDLE_AT; } else { sbi->gc_mode = GC_NORMAL; } -- GitLab From f62ca4e2a863033d9b3b5a00a0d897557c9da6c5 Mon Sep 17 00:00:00 2001 From: Li-hao Kuo Date: Tue, 18 Jan 2022 16:42:38 +0800 Subject: [PATCH 0027/1586] spi: Add spi driver for Sunplus SP7021 Add spi driver for Sunplus SP7021. Signed-off-by: Li-hao Kuo Link: https://lore.kernel.org/r/37998e515d561e762ee30d0ac4fca25a948e0c5c.1642494310.git.lhjeff911@gmail.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 + drivers/spi/Kconfig | 11 + drivers/spi/Makefile | 1 + drivers/spi/spi-sunplus-sp7021.c | 602 +++++++++++++++++++++++++++++++ 4 files changed, 620 insertions(+) create mode 100644 drivers/spi/spi-sunplus-sp7021.c diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c9143848..a517966bc7e88 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18491,6 +18491,12 @@ S: Maintained F: Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml F: drivers/rtc/rtc-sunplus.c +SUNPLUS SPI CONTROLLER INTERFACE DRIVER +M: Li-hao Kuo +L: linux-spi@vger.kernel.org +S: Maintained +F: drivers/spi/spi-sunplus-sp7021.c + SUPERH M: Yoshinori Sato M: Rich Felker diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index b2a8821971e1d..203f4ec321190 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -866,6 +866,17 @@ config SPI_SUN6I help This enables using the SPI controller on the Allwinner A31 SoCs. +config SPI_SUNPLUS_SP7021 + tristate "Sunplus SP7021 SPI controller" + depends on SOC_SP7021 || COMPILE_TEST + help + This enables Sunplus SP7021 SPI controller driver on the SP7021 SoCs. + This driver can also be built as a module. If so, the module will be + called as spi-sunplus-sp7021. + + If you have a Sunplus SP7021 platform say Y here. + If unsure, say N. + config SPI_SYNQUACER tristate "Socionext's SynQuacer HighSpeed SPI controller" depends on ARCH_SYNQUACER || COMPILE_TEST diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index dd7393a6046fa..b455eafd53677 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -119,6 +119,7 @@ obj-$(CONFIG_SPI_STM32_QSPI) += spi-stm32-qspi.o obj-$(CONFIG_SPI_ST_SSC4) += spi-st-ssc4.o obj-$(CONFIG_SPI_SUN4I) += spi-sun4i.o obj-$(CONFIG_SPI_SUN6I) += spi-sun6i.o +obj-$(CONFIG_SPI_SUNPLUS_SP7021) += spi-sunplus-sp7021.o obj-$(CONFIG_SPI_SYNQUACER) += spi-synquacer.o obj-$(CONFIG_SPI_TEGRA210_QUAD) += spi-tegra210-quad.o obj-$(CONFIG_SPI_TEGRA114) += spi-tegra114.o diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c new file mode 100644 index 0000000000000..627b9c3024e96 --- /dev/null +++ b/drivers/spi/spi-sunplus-sp7021.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2021 Sunplus Inc. +// Author: Li-hao Kuo + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SP7021_DATA_RDY_REG 0x0044 +#define SP7021_SLAVE_DMA_CTRL_REG 0x0048 +#define SP7021_SLAVE_DMA_LENGTH_REG 0x004c +#define SP7021_SLAVE_DMA_ADDR_REG 0x004c + +#define SP7021_SLAVE_DATA_RDY BIT(0) +#define SP7021_SLAVE_SW_RST BIT(1) +#define SP7021_SLA_DMA_W_INT BIT(8) +#define SP7021_SLAVE_CLR_INT BIT(8) +#define SP7021_SLAVE_DMA_EN BIT(0) +#define SP7021_SLAVE_DMA_RW BIT(6) +#define SP7021_SLAVE_DMA_CMD GENMASK(3, 2) + +#define SP7021_FIFO_REG 0x0034 +#define SP7021_SPI_STATUS_REG 0x0038 +#define SP7021_SPI_CONFIG_REG 0x003c +#define SP7021_INT_BUSY_REG 0x004c +#define SP7021_DMA_CTRL_REG 0x0050 + +#define SP7021_SPI_START_FD BIT(0) +#define SP7021_FD_SW_RST BIT(1) +#define SP7021_TX_EMP_FLAG BIT(2) +#define SP7021_RX_EMP_FLAG BIT(4) +#define SP7021_RX_FULL_FLAG BIT(5) +#define SP7021_FINISH_FLAG BIT(6) + +#define SP7021_TX_CNT_MASK GENMASK(11, 8) +#define SP7021_RX_CNT_MASK GENMASK(15, 12) +#define SP7021_TX_LEN_MASK GENMASK(23, 16) +#define SP7021_GET_LEN_MASK GENMASK(31, 24) +#define SP7021_SET_TX_LEN GENMASK(23, 16) +#define SP7021_SET_XFER_LEN GENMASK(31, 24) + +#define SP7021_CPOL_FD BIT(0) +#define SP7021_CPHA_R BIT(1) +#define SP7021_CPHA_W BIT(2) +#define SP7021_LSB_SEL BIT(4) +#define SP7021_CS_POR BIT(5) +#define SP7021_FD_SEL BIT(6) + +#define SP7021_RX_UNIT GENMASK(8, 7) +#define SP7021_TX_UNIT GENMASK(10, 9) +#define SP7021_TX_EMP_FLAG_MASK BIT(11) +#define SP7021_RX_FULL_FLAG_MASK BIT(14) +#define SP7021_FINISH_FLAG_MASK BIT(15) +#define SP7021_CLEAN_RW_BYTE GENMASK(10, 7) +#define SP7021_CLEAN_FLUG_MASK GENMASK(15, 11) +#define SP7021_CLK_MASK GENMASK(31, 16) + +#define SP7021_INT_BYPASS BIT(3) +#define SP7021_CLR_MASTER_INT BIT(6) + +#define SP7021_SPI_DATA_SIZE (255) +#define SP7021_FIFO_DATA_LEN (16) + +enum SP_SPI_MODE { + SP7021_SLAVE_READ = 0, + SP7021_SLAVE_WRITE = 1, + SP7021_SPI_IDLE = 2, +}; + +enum { + SP7021_MASTER_MODE = 0, + SP7021_SLAVE_MODE = 1, +}; + +struct sp7021_spi_ctlr { + struct device *dev; + struct spi_controller *ctlr; + void __iomem *m_base; + void __iomem *s_base; + u32 xfer_conf; + int mode; + int m_irq; + int s_irq; + struct clk *spi_clk; + struct reset_control *rstc; + // irq spin lock + spinlock_t lock; + // data xfer lock + struct mutex buf_lock; + struct completion isr_done; + struct completion slave_isr; + unsigned int rx_cur_len; + unsigned int tx_cur_len; + unsigned int data_unit; + const u8 *tx_buf; + u8 *rx_buf; +}; + +static irqreturn_t sp7021_spi_slave_irq(int irq, void *dev) +{ + struct sp7021_spi_ctlr *pspim = dev; + unsigned int data_status; + + data_status = readl(pspim->s_base + SP7021_DATA_RDY_REG); + writel(data_status | SP7021_SLAVE_CLR_INT, pspim->s_base + SP7021_DATA_RDY_REG); + complete(&pspim->slave_isr); + return IRQ_HANDLED; +} + +static int sp7021_spi_slave_abort(struct spi_controller *ctlr) +{ + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + + complete(&pspim->slave_isr); + complete(&pspim->isr_done); + return 0; +} + +int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer) +{ + struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller); + + reinit_completion(&pspim->slave_isr); + writel(SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3), + pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); + writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG); + writel(xfer->tx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG); + writel(readl(pspim->s_base + SP7021_DATA_RDY_REG) | SP7021_SLAVE_DATA_RDY, + pspim->s_base + SP7021_DATA_RDY_REG); + if (wait_for_completion_interruptible(&pspim->isr_done)) { + dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__); + return -EINTR; + } + return 0; +} + +int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer) +{ + struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller); + int ret = 0; + + reinit_completion(&pspim->isr_done); + writel(SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3), + pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); + writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG); + writel(xfer->rx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG); + if (wait_for_completion_interruptible(&pspim->isr_done)) { + dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__); + return -EINTR; + } + writel(SP7021_SLAVE_SW_RST, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); + return ret; +} + +void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++) { + pspim->rx_buf[pspim->rx_cur_len] = + readl(pspim->m_base + SP7021_FIFO_REG); + pspim->rx_cur_len++; + } +} + +void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++) { + writel(pspim->tx_buf[pspim->tx_cur_len], + pspim->m_base + SP7021_FIFO_REG); + pspim->tx_cur_len++; + } +} + +static irqreturn_t sp7021_spi_master_irq(int irq, void *dev) +{ + struct sp7021_spi_ctlr *pspim = dev; + unsigned int tx_cnt, total_len; + unsigned int tx_len, rx_cnt; + unsigned int fd_status; + unsigned long flags; + bool isrdone = false; + u32 value; + + fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG); + tx_cnt = FIELD_GET(SP7021_TX_CNT_MASK, fd_status); + tx_len = FIELD_GET(SP7021_TX_LEN_MASK, fd_status); + total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status); + + if ((fd_status & SP7021_TX_EMP_FLAG) && (fd_status & SP7021_RX_EMP_FLAG) && total_len == 0) + return IRQ_NONE; + + if (tx_len == 0 && total_len == 0) + return IRQ_NONE; + + spin_lock_irqsave(&pspim->lock, flags); + + rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status); + if (fd_status & SP7021_RX_FULL_FLAG) + rx_cnt = pspim->data_unit; + + tx_cnt = min(tx_len - pspim->tx_cur_len, pspim->data_unit - tx_cnt); + dev_dbg(pspim->dev, "fd_st=0x%x rx_c:%d tx_c:%d tx_l:%d", + fd_status, rx_cnt, tx_cnt, tx_len); + + if (rx_cnt > 0) + sp7021_spi_master_rb(pspim, rx_cnt); + if (tx_cnt > 0) + sp7021_spi_master_wb(pspim, tx_cnt); + + fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG); + tx_len = FIELD_GET(SP7021_TX_LEN_MASK, fd_status); + total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status); + + if (fd_status & SP7021_FINISH_FLAG || tx_len == pspim->tx_cur_len) { + while (total_len != pspim->rx_cur_len) { + fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG); + total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status); + if (fd_status & SP7021_RX_FULL_FLAG) + rx_cnt = pspim->data_unit; + else + rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status); + + if (rx_cnt > 0) + sp7021_spi_master_rb(pspim, rx_cnt); + } + value = readl(pspim->m_base + SP7021_INT_BUSY_REG); + value |= SP7021_CLR_MASTER_INT; + writel(value, pspim->m_base + SP7021_INT_BUSY_REG); + writel(SP7021_FINISH_FLAG, pspim->m_base + SP7021_SPI_STATUS_REG); + isrdone = true; + } + + if (isrdone) + complete(&pspim->isr_done); + spin_unlock_irqrestore(&pspim->lock, flags); + return IRQ_HANDLED; +} + +static void sp7021_prep_transfer(struct spi_controller *ctlr, struct spi_device *spi) +{ + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + + pspim->tx_cur_len = 0; + pspim->rx_cur_len = 0; + pspim->data_unit = SP7021_FIFO_DATA_LEN; +} + +// preliminary set CS, CPOL, CPHA and LSB +static int sp7021_spi_controller_prepare_message(struct spi_controller *ctlr, + struct spi_message *msg) +{ + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + struct spi_device *s = msg->spi; + u32 valus, rs = 0; + + valus = readl(pspim->m_base + SP7021_SPI_STATUS_REG); + valus |= SP7021_FD_SW_RST; + writel(valus, pspim->m_base + SP7021_SPI_STATUS_REG); + rs |= SP7021_FD_SEL; + if (s->mode & SPI_CPOL) + rs |= SP7021_CPOL_FD; + + if (s->mode & SPI_LSB_FIRST) + rs |= SP7021_LSB_SEL; + + if (s->mode & SPI_CS_HIGH) + rs |= SP7021_CS_POR; + + if (s->mode & SPI_CPHA) + rs |= SP7021_CPHA_R; + else + rs |= SP7021_CPHA_W; + + rs |= FIELD_PREP(SP7021_TX_UNIT, 0) | FIELD_PREP(SP7021_RX_UNIT, 0); + pspim->xfer_conf = rs; + if (pspim->xfer_conf & SP7021_CPOL_FD) + writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG); + + return 0; +} + +static void sp7021_spi_setup_clk(struct spi_controller *ctlr, struct spi_transfer *xfer) +{ + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + u32 clk_rate, clk_sel, div; + + clk_rate = clk_get_rate(pspim->spi_clk); + div = clk_rate / xfer->speed_hz; + if (div < 2) + div = 2; + clk_sel = (div / 2) - 1; + pspim->xfer_conf &= SP7021_CLK_MASK; + pspim->xfer_conf |= FIELD_PREP(SP7021_CLK_MASK, clk_sel); + writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG); +} + +static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct spi_device *spi, + struct spi_transfer *xfer) +{ + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + unsigned long timeout = msecs_to_jiffies(1000); + unsigned int xfer_cnt, xfer_len, last_len; + unsigned int i, len_temp; + u32 reg_temp; + int ret; + + xfer_cnt = xfer->len / SP7021_SPI_DATA_SIZE; + last_len = xfer->len % SP7021_SPI_DATA_SIZE; + + for (i = 0; i <= xfer_cnt; i++) { + mutex_lock(&pspim->buf_lock); + sp7021_prep_transfer(ctlr, spi); + sp7021_spi_setup_clk(ctlr, xfer); + reinit_completion(&pspim->isr_done); + + if (i == xfer_cnt) + xfer_len = last_len; + else + xfer_len = SP7021_SPI_DATA_SIZE; + + pspim->tx_buf = xfer->tx_buf + i * SP7021_SPI_DATA_SIZE; + pspim->rx_buf = xfer->rx_buf + i * SP7021_SPI_DATA_SIZE; + + if (pspim->tx_cur_len < xfer_len) { + len_temp = min(pspim->data_unit, xfer_len); + sp7021_spi_master_wb(pspim, len_temp); + } + reg_temp = readl(pspim->m_base + SP7021_SPI_CONFIG_REG); + reg_temp &= ~SP7021_CLEAN_RW_BYTE; + reg_temp &= ~SP7021_CLEAN_FLUG_MASK; + reg_temp |= SP7021_FD_SEL | SP7021_FINISH_FLAG_MASK | + SP7021_TX_EMP_FLAG_MASK | SP7021_RX_FULL_FLAG_MASK | + FIELD_PREP(SP7021_TX_UNIT, 0) | FIELD_PREP(SP7021_RX_UNIT, 0); + writel(reg_temp, pspim->m_base + SP7021_SPI_CONFIG_REG); + + reg_temp = FIELD_PREP(SP7021_SET_TX_LEN, xfer_len) | + FIELD_PREP(SP7021_SET_XFER_LEN, xfer_len) | + SP7021_SPI_START_FD; + writel(reg_temp, pspim->m_base + SP7021_SPI_STATUS_REG); + + if (!wait_for_completion_interruptible_timeout(&pspim->isr_done, timeout)) { + dev_err(&spi->dev, "wait_for_completion err\n"); + return -ETIMEDOUT; + } + + reg_temp = readl(pspim->m_base + SP7021_SPI_STATUS_REG); + if (reg_temp & SP7021_FINISH_FLAG) { + writel(SP7021_FINISH_FLAG, pspim->m_base + SP7021_SPI_STATUS_REG); + writel(readl(pspim->m_base + SP7021_SPI_CONFIG_REG) & + SP7021_CLEAN_FLUG_MASK, pspim->m_base + SP7021_SPI_CONFIG_REG); + } + + if (pspim->xfer_conf & SP7021_CPOL_FD) + writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG); + + mutex_unlock(&pspim->buf_lock); + ret = 0; + } + return ret; +} + +static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi_device *spi, + struct spi_transfer *xfer) +{ + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + struct device *dev = pspim->dev; + int mode, ret = 0; + + mode = SP7021_SPI_IDLE; + if (xfer->tx_buf && xfer->rx_buf) { + dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__); + ret = -EINVAL; + } else if (xfer->tx_buf) { + xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf, + xfer->len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, xfer->tx_dma)) + return -ENOMEM; + mode = SP7021_SLAVE_WRITE; + } else if (xfer->rx_buf) { + xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, xfer->rx_dma)) + return -ENOMEM; + mode = SP7021_SLAVE_READ; + } + + switch (mode) { + case SP7021_SLAVE_WRITE: + ret = sp7021_spi_slave_tx(spi, xfer); + break; + case SP7021_SLAVE_READ: + ret = sp7021_spi_slave_rx(spi, xfer); + break; + default: + break; + } + if (xfer->tx_buf) + dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE); + if (xfer->rx_buf) + dma_unmap_single(dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE); + + spi_finalize_current_transfer(ctlr); + return ret; +} + +static void sp7021_spi_disable_unprepare(void *data) +{ + clk_disable_unprepare(data); +} + +static void sp7021_spi_reset_control_assert(void *data) +{ + reset_control_assert(data); +} + +static int sp7021_spi_controller_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sp7021_spi_ctlr *pspim; + struct spi_controller *ctlr; + int mode, ret; + + pdev->id = of_alias_get_id(pdev->dev.of_node, "sp_spi"); + + if (device_property_read_bool(dev, "spi-slave")) + mode = SP7021_SLAVE_MODE; + else + mode = SP7021_MASTER_MODE; + + if (mode == SP7021_SLAVE_MODE) + ctlr = devm_spi_alloc_slave(dev, sizeof(*pspim)); + else + ctlr = devm_spi_alloc_master(dev, sizeof(*pspim)); + if (!ctlr) + return -ENOMEM; + device_set_node(&ctlr->dev, pdev->dev.fwnode); + ctlr->bus_num = pdev->id; + ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST; + ctlr->auto_runtime_pm = true; + ctlr->prepare_message = sp7021_spi_controller_prepare_message; + if (mode == SP7021_SLAVE_MODE) { + ctlr->transfer_one = sp7021_spi_slave_transfer_one; + ctlr->slave_abort = sp7021_spi_slave_abort; + ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX; + } else { + ctlr->bits_per_word_mask = SPI_BPW_MASK(8); + ctlr->min_speed_hz = 40000; + ctlr->max_speed_hz = 25000000; + ctlr->use_gpio_descriptors = true; + ctlr->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX; + ctlr->transfer_one = sp7021_spi_master_transfer_one; + } + platform_set_drvdata(pdev, ctlr); + pspim = spi_controller_get_devdata(ctlr); + pspim->mode = mode; + pspim->ctlr = ctlr; + pspim->dev = dev; + spin_lock_init(&pspim->lock); + mutex_init(&pspim->buf_lock); + init_completion(&pspim->isr_done); + init_completion(&pspim->slave_isr); + + pspim->m_base = devm_platform_ioremap_resource_byname(pdev, "master"); + if (IS_ERR(pspim->m_base)) + return dev_err_probe(dev, PTR_ERR(pspim->m_base), "m_base get fail\n"); + + pspim->s_base = devm_platform_ioremap_resource_byname(pdev, "slave"); + if (IS_ERR(pspim->s_base)) + return dev_err_probe(dev, PTR_ERR(pspim->s_base), "s_base get fail\n"); + + pspim->m_irq = platform_get_irq_byname(pdev, "master_risc"); + if (pspim->m_irq < 0) + return pspim->m_irq; + + pspim->s_irq = platform_get_irq_byname(pdev, "slave_risc"); + if (pspim->s_irq < 0) + return pspim->s_irq; + + ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq, + IRQF_TRIGGER_RISING, pdev->name, pspim); + if (ret) + return ret; + + ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq, + IRQF_TRIGGER_RISING, pdev->name, pspim); + if (ret) + return ret; + + pspim->spi_clk = devm_clk_get(dev, NULL); + if (IS_ERR(pspim->spi_clk)) + return dev_err_probe(dev, PTR_ERR(pspim->spi_clk), "clk get fail\n"); + + pspim->rstc = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(pspim->rstc)) + return dev_err_probe(dev, PTR_ERR(pspim->rstc), "rst get fail\n"); + + ret = clk_prepare_enable(pspim->spi_clk); + if (ret) + return dev_err_probe(dev, ret, "failed to enable clk\n"); + + ret = devm_add_action_or_reset(dev, sp7021_spi_disable_unprepare, pspim->spi_clk); + if (ret) + return ret; + + ret = reset_control_deassert(pspim->rstc); + if (ret) + return dev_err_probe(dev, ret, "failed to deassert reset\n"); + + ret = devm_add_action_or_reset(dev, sp7021_spi_reset_control_assert, pspim->rstc); + if (ret) + return ret; + + pm_runtime_enable(dev); + ret = spi_register_controller(ctlr); + if (ret) { + pm_runtime_disable(dev); + return dev_err_probe(dev, ret, "spi_register_master fail\n"); + } + return 0; +} + +static int sp7021_spi_controller_remove(struct platform_device *pdev) +{ + struct spi_controller *ctlr = dev_get_drvdata(&pdev->dev); + + spi_unregister_controller(ctlr); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + return 0; +} + +static int __maybe_unused sp7021_spi_controller_suspend(struct device *dev) +{ + struct spi_controller *ctlr = dev_get_drvdata(dev); + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + + return reset_control_assert(pspim->rstc); +} + +static int __maybe_unused sp7021_spi_controller_resume(struct device *dev) +{ + struct spi_controller *ctlr = dev_get_drvdata(dev); + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + + reset_control_deassert(pspim->rstc); + return clk_prepare_enable(pspim->spi_clk); +} + +static int sp7021_spi_runtime_suspend(struct device *dev) +{ + struct spi_controller *ctlr = dev_get_drvdata(dev); + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + + return reset_control_assert(pspim->rstc); +} + +static int sp7021_spi_runtime_resume(struct device *dev) +{ + struct spi_controller *ctlr = dev_get_drvdata(dev); + struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); + + return reset_control_deassert(pspim->rstc); +} + +static const struct dev_pm_ops sp7021_spi_pm_ops = { + SET_RUNTIME_PM_OPS(sp7021_spi_runtime_suspend, + sp7021_spi_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(sp7021_spi_controller_suspend, + sp7021_spi_controller_resume) +}; + +static const struct of_device_id sp7021_spi_controller_ids[] = { + { .compatible = "sunplus,sp7021-spi" }, + {} +}; +MODULE_DEVICE_TABLE(of, sp7021_spi_controller_ids); + +static struct platform_driver sp7021_spi_controller_driver = { + .probe = sp7021_spi_controller_probe, + .remove = sp7021_spi_controller_remove, + .driver = { + .name = "sunplus,sp7021-spi-controller", + .of_match_table = sp7021_spi_controller_ids, + .pm = &sp7021_spi_pm_ops, + }, +}; +module_platform_driver(sp7021_spi_controller_driver); + +MODULE_AUTHOR("Li-hao Kuo "); +MODULE_DESCRIPTION("Sunplus SPI controller driver"); +MODULE_LICENSE("GPL"); -- GitLab From a708078eeb992799161e794d9c569cf4f725a5b0 Mon Sep 17 00:00:00 2001 From: Li-hao Kuo Date: Tue, 25 Jan 2022 12:14:29 +0000 Subject: [PATCH 0028/1586] spi: Add Sunplus SP7021 schema Add bindings for Sunplus SP7021 spi driver Signed-off-by: Li-hao Kuo Signed-off-by: Mark Brown --- .../bindings/spi/spi-sunplus-sp7021.yaml | 81 +++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 82 insertions(+) create mode 100644 Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml new file mode 100644 index 0000000000000..24382cdda6455 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright (C) Sunplus Co., Ltd. 2021 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/spi-sunplus-sp7021.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sunplus sp7021 SPI controller + +allOf: + - $ref: "spi-controller.yaml" + +maintainers: + - Li-hao Kuo + +properties: + compatible: + enum: + - sunplus,sp7021-spi + + reg: + items: + - the SPI master registers + - the SPI slave registers + + reg-names: + items: + - const: master + - const: slave + + interrupt-names: + items: + - const: dma_w + - const: master_risc + - const: slave_risc + + interrupts: + minItems: 3 + + clocks: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - clocks + - clocks-names + - resets + - pinctrl-names + - pinctrl-0 + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + spi@9C002D80 { + compatible = "sunplus,sp7021-spi"; + reg = <0x9C002D80 0x80>, <0x9C002E00 0x80>; + reg-names = "master", "slave"; + interrupt-parent = <&intc>; + interrupt-names = "dma_w", + "master_risc", + "slave_risc"; + interrupts = <144 IRQ_TYPE_LEVEL_HIGH>, + <146 IRQ_TYPE_LEVEL_HIGH>, + <145 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clkc SPI_COMBO_0>; + resets = <&rstc RST_SPI_COMBO_0>; + pinctrl-names = "default"; + pinctrl-0 = <&pins_spi0>; + }; +... diff --git a/MAINTAINERS b/MAINTAINERS index a517966bc7e88..13301adabe6e8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18495,6 +18495,7 @@ SUNPLUS SPI CONTROLLER INTERFACE DRIVER M: Li-hao Kuo L: linux-spi@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml F: drivers/spi/spi-sunplus-sp7021.c SUPERH -- GitLab From b1bbd3a57b94889cd17147f5594db7f0652275ef Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 17 Jan 2022 16:48:21 -0800 Subject: [PATCH 0029/1586] thermal: fix Documentation bullet list warning Fix bullet list indentation formatting to prevent a docs build warning. Use same indentation style as other files in this directory. Documentation/ABI/testing/sysfs-class-thermal:201: WARNING: Bullet list ends without a blank line; unexpected unindent. Fixes: 80da1b508f29 ("thermal: Move ABI documentation to Documentation/ABI") Signed-off-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-class-thermal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-class-thermal b/Documentation/ABI/testing/sysfs-class-thermal index 2c52bb1f864ce..8eee37982b2a8 100644 --- a/Documentation/ABI/testing/sysfs-class-thermal +++ b/Documentation/ABI/testing/sysfs-class-thermal @@ -203,7 +203,7 @@ Description: - for generic ACPI: should be "Fan", "Processor" or "LCD" - for memory controller device on intel_menlow platform: - should be "Memory controller". + should be "Memory controller". RO, Required -- GitLab From ae57857b9b6341096ddfd9c0cf26fb640c561160 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jan 2022 16:55:25 +0100 Subject: [PATCH 0030/1586] ACPICA: Use uintptr_t and offsetof() in Linux kernel builds To avoid "performing pointer subtraction with a null pointer has undefined behavior" compiler warnings, use uintptr_t and offsetof() that are always available during Linux kernel builds to define acpi_uintptr_t and the ACPI_TO_INTEGER() and ACPI_OFFSET() macros. Based on earlier proposal from Arnd Bergmann. Link: https://lore.kernel.org/linux-acpi/20210927121338.938994-1-arnd@kernel.org Signed-off-by: Rafael J. Wysocki Reviewed-by: Arnd Bergmann --- include/acpi/actypes.h | 4 ++++ include/acpi/platform/aclinux.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 69e89d572b9e3..02c1fa16e6388 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -507,8 +507,12 @@ typedef u64 acpi_integer; /* Pointer/Integer type conversions */ #define ACPI_TO_POINTER(i) ACPI_CAST_PTR (void, (acpi_size) (i)) +#ifndef ACPI_TO_INTEGER #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) 0) +#endif +#ifndef ACPI_OFFSET #define ACPI_OFFSET(d, f) ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0) +#endif #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) /* Optimizations for 4-character (32-bit) acpi_name manipulation */ diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index b3ffb9bbf664b..cec41e004ecf7 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -114,6 +114,11 @@ #define acpi_raw_spinlock raw_spinlock_t * #define acpi_cpu_flags unsigned long +#define acpi_uintptr_t uintptr_t + +#define ACPI_TO_INTEGER(p) ((uintptr_t)(p)) +#define ACPI_OFFSET(d, f) offsetof(d, f) + /* Use native linux version of acpi_os_allocate_zeroed */ #define USE_NATIVE_ALLOCATE_ZEROED -- GitLab From 2e433a94dab0246fee706d18aaecd67007ead404 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Jan 2022 19:31:48 +0100 Subject: [PATCH 0031/1586] ACPI: OSL: Fix and clean up acpi_os_read/write_port() First, remove type casts that make acpi_os_read_port() only work on little endian and are generally not needed. Second, avoid clearing the memory pointed to by the value return pointer in acpi_os_read_port() if it is the dummy on the stack (in which case clearing it is not necessary). Finally, prevent both acpi_os_read_port() and acpi_os_write_port() from crashing the kernel when they receive an unsupported width value and make them print a debug message and return an error instead. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osl.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 45c5c0e45e332..7a70c4bfc23c6 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -642,22 +642,24 @@ u64 acpi_os_get_timer(void) (ACPI_100NSEC_PER_SEC / HZ); } -acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width) +acpi_status acpi_os_read_port(acpi_io_address port, u32 *value, u32 width) { u32 dummy; - if (!value) + if (value) + *value = 0; + else value = &dummy; - *value = 0; if (width <= 8) { - *(u8 *) value = inb(port); + *value = inb(port); } else if (width <= 16) { - *(u16 *) value = inw(port); + *value = inw(port); } else if (width <= 32) { - *(u32 *) value = inl(port); + *value = inl(port); } else { - BUG(); + pr_debug("%s: Access width %d not supported\n", __func__, width); + return AE_BAD_PARAMETER; } return AE_OK; @@ -674,7 +676,8 @@ acpi_status acpi_os_write_port(acpi_io_address port, u32 value, u32 width) } else if (width <= 32) { outl(value, port); } else { - BUG(); + pr_debug("%s: Access width %d not supported\n", __func__, width); + return AE_BAD_PARAMETER; } return AE_OK; -- GitLab From babc92da5928f81af951663fc436997352e02d3a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 14 Jan 2022 13:24:49 +0200 Subject: [PATCH 0032/1586] ACPI: properties: Consistently return -ENOENT if there are no more references __acpi_node_get_property_reference() is documented to return -ENOENT if the caller requests a property reference at an index that does not exist, not -EINVAL which it actually does. Fix this by returning -ENOENT consistenly, independently of whether the property value is a plain reference or a package. Fixes: c343bc2ce2c6 ("ACPI: properties: Align return codes of __acpi_node_get_property_reference()") Cc: 4.14+ # 4.14+ Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index d0986bda29640..3fceb4681ec9f 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -685,7 +685,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, */ if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) { if (index) - return -EINVAL; + return -ENOENT; device = acpi_fetch_acpi_dev(obj->reference.handle); if (!device) -- GitLab From 0266c25e7c2821181b610595df42cbca6bc93cb8 Mon Sep 17 00:00:00 2001 From: "GONG, Ruiqi" Date: Tue, 25 Jan 2022 15:11:33 +0800 Subject: [PATCH 0033/1586] selinux: access superblock_security_struct in LSM blob way LSM blob has been involved for superblock's security struct. So fix the remaining direct access to sb->s_security by using the LSM blob mechanism. Fixes: 08abe46b2cfc ("selinux: fall back to SECURITY_FS_USE_GENFS if no xattr support") Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount") Signed-off-by: GONG, Ruiqi Reviewed-by: Casey Schaufler Signed-off-by: Paul Moore --- security/selinux/hooks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b6895e4fc29e..a0243bae84238 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -479,7 +479,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) static int sb_check_xattr_support(struct super_block *sb) { - struct superblock_security_struct *sbsec = sb->s_security; + struct superblock_security_struct *sbsec = selinux_superblock(sb); struct dentry *root = sb->s_root; struct inode *root_inode = d_backing_inode(root); u32 sid; @@ -2647,7 +2647,7 @@ free_opt: static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) { struct selinux_mnt_opts *opts = mnt_opts; - struct superblock_security_struct *sbsec = sb->s_security; + struct superblock_security_struct *sbsec = selinux_superblock(sb); u32 sid; int rc; -- GitLab From bcb62828e3e8c813b6613db6eb7fd9657db248fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:20 +0100 Subject: [PATCH 0034/1586] selinux: check return value of sel_make_avc_files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sel_make_avc_files() might fail and return a negative errno value on memory allocation failures. Re-add the check of the return value, dropped in 66f8e2f03c02 ("selinux: sidtab reverse lookup hash table"). Reported by clang-analyzer: security/selinux/selinuxfs.c:2129:2: warning: Value stored to 'ret' is never read [deadcode.DeadStores] ret = sel_make_avc_files(dentry); ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 66f8e2f03c02 ("selinux: sidtab reverse lookup hash table") Signed-off-by: Christian Göttsche Reviewed-by: Nick Desaulniers [PM: description line wrapping, added proper commit ref] Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index e4cd7cb856f37..f2f6203e0fff5 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2127,6 +2127,8 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc) } ret = sel_make_avc_files(dentry); + if (ret) + goto err; dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino); if (IS_ERR(dentry)) { -- GitLab From 08df49054f311ca04954cf24d1216d3b5ddfd0a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:12 +0100 Subject: [PATCH 0035/1586] selinux: declare path parameters of _genfs_sid const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The path parameter is only read from in security_genfs_sid(), selinux_policy_genfs_sid() and __security_genfs_sid(). Since a string literal is passed as argument, declare the parameter const. Also align the parameter names in the declaration and definition. Reported by clang [-Wwrite-strings]: security/selinux/hooks.c:553:60: error: passing 'const char [2]' to parameter of type 'char *' discards qualifiers [-Wincompatible-pointer-types-discards-qualifiers] rc = security_genfs_sid(&selinux_state, ... , /, ^~~ ./security/selinux/include/security.h:389:36: note: passing argument to parameter 'name' here const char *fstype, char *name, u16 sclass, ^ Signed-off-by: Christian Göttsche [PM: wrapped description] Signed-off-by: Paul Moore --- security/selinux/include/security.h | 4 ++-- security/selinux/ss/services.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index ac0ece01305a6..6482e0efb3689 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -386,11 +386,11 @@ int security_get_allow_unknown(struct selinux_state *state); int security_fs_use(struct selinux_state *state, struct super_block *sb); int security_genfs_sid(struct selinux_state *state, - const char *fstype, char *name, u16 sclass, + const char *fstype, const char *path, u16 sclass, u32 *sid); int selinux_policy_genfs_sid(struct selinux_policy *policy, - const char *fstype, char *name, u16 sclass, + const char *fstype, const char *path, u16 sclass, u32 *sid); #ifdef CONFIG_NETLABEL diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8e92af7dd284c..5a7df45bdab17 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2875,7 +2875,7 @@ out_unlock: */ static inline int __security_genfs_sid(struct selinux_policy *policy, const char *fstype, - char *path, + const char *path, u16 orig_sclass, u32 *sid) { @@ -2928,7 +2928,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, */ int security_genfs_sid(struct selinux_state *state, const char *fstype, - char *path, + const char *path, u16 orig_sclass, u32 *sid) { @@ -2952,7 +2952,7 @@ int security_genfs_sid(struct selinux_state *state, int selinux_policy_genfs_sid(struct selinux_policy *policy, const char *fstype, - char *path, + const char *path, u16 orig_sclass, u32 *sid) { -- GitLab From 841255544b653cbabe4ee5eda56bbb8b7ad8de8a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:33 +0800 Subject: [PATCH 0036/1586] clocksource/drivers/imx-sysctr: Drop IRQF_IRQPOLL Per the Documentation, IRQF_IRQPOLL is used for polling (only the interrupt that is registered first in a shared interrupt is considered for performance reasons) But this timer is not sharing interrupt line with others, and actually irqpoll not work with this timer with IRQF_IRQPOLL set, so drop the flag. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-2-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-sysctr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c index 55a8e198d2a19..523e37662a6e8 100644 --- a/drivers/clocksource/timer-imx-sysctr.c +++ b/drivers/clocksource/timer-imx-sysctr.c @@ -110,7 +110,7 @@ static struct timer_of to_sysctr = { }, .of_irq = { .handler = sysctr_timer_interrupt, - .flags = IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER, }, .of_clk = { .name = "per", -- GitLab From 59e2bcd8a95b4cc5a04809b6d2ee337e81b47f57 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:34 +0800 Subject: [PATCH 0037/1586] clocksource/drivers/imx-tpm: Drop IRQF_IRQPOLL Per the Documentation, IRQF_IRQPOLL is used for polling (only the interrupt that is registered first in a shared interrupt is considered for performance reasons) The TPM timer is not sharing interrupt with others, and pass irqpoll not make sense for i.MX platform. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-3-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 2cdc077a39f5d..2c0b0d4eba9ef 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -137,7 +137,7 @@ static struct timer_of to_tpm = { }, .of_irq = { .handler = tpm_timer_interrupt, - .flags = IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER, }, .of_clk = { .name = "per", -- GitLab From 5b3c267506eba2972d53dafb8b988d5fd28d223d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:35 +0800 Subject: [PATCH 0038/1586] clocksource/drivers/imx-tpm: Mark two variable with __ro_after_init counter_width and timer_base will not be updated after init, so mark as __ro_after_init. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-4-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 2c0b0d4eba9ef..3afd9b0a668a6 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -32,8 +32,8 @@ #define TPM_C0SC_CHF_MASK (0x1 << 7) #define TPM_C0V 0x24 -static int counter_width; -static void __iomem *timer_base; +static int counter_width __ro_after_init; +static void __iomem *timer_base __ro_after_init; static inline void tpm_timer_disable(void) { -- GitLab From 39664b624f6a6518001b2c8f86bac1352c89d0af Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:36 +0800 Subject: [PATCH 0039/1586] clocksource/drivers/imx-tpm: Add CLOCK_EVT_FEAT_DYNIRQ Add CLOCK_EVT_FEAT_DYNIRQ to allow the IRQ could be runtime set affinity to the cores that needs wake up, otherwise saying core0 has to send IPI to wakeup core1. With CLOCK_EVT_FEAT_DYNIRQ set, when broadcast timer could wake up the cores, IPI is not needed. Acked-by: Jacky Bai Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-5-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 3afd9b0a668a6..578fe162fd993 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -129,7 +129,7 @@ static struct timer_of to_tpm = { .clkevt = { .name = "i.MX7ULP TPM Timer", .rating = 200, - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ, .set_state_shutdown = tpm_set_state_shutdown, .set_state_oneshot = tpm_set_state_oneshot, .set_next_event = tpm_set_next_event, -- GitLab From e547ffe9e6f497e36bde9d86dbcfbc781946752b Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 20:07:37 +0800 Subject: [PATCH 0040/1586] clocksource/drivers/imx-tpm: Update name of clkevt The tpm driver is not only for i.MX7ULP now, i.MX8ULP also use it. It maybe also used by other i.MX variants, so update name to reflect it. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20211214120737.1611955-6-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 578fe162fd993..df8064122b10c 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -127,7 +127,7 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id) static struct timer_of to_tpm = { .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, .clkevt = { - .name = "i.MX7ULP TPM Timer", + .name = "i.MX TPM Timer", .rating = 200, .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ, .set_state_shutdown = tpm_set_state_shutdown, -- GitLab From 474fc2e6395d62758e80b9ea65f61339296355fc Mon Sep 17 00:00:00 2001 From: Guochun Mao Date: Wed, 26 Jan 2022 17:11:59 +0800 Subject: [PATCH 0041/1586] spi: spi-mtk-nor: make some internal variables static Variables mtk_nor_caps_mt8173, mtk_nor_caps_mt8186 and mtk_nor_caps_mt8192 are not declared. Make them static. Fixes: 5b177234e9fd ("spi: spi-mtk-nor: improve device table for adding more capabilities") Signed-off-by: Guochun Mao Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220126091159.27513-1-guochun.mao@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mtk-nor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index 455b4dcb26e9a..94fb09696677f 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -770,17 +770,17 @@ static const struct spi_controller_mem_ops mtk_nor_mem_ops = { .exec_op = mtk_nor_exec_op }; -const struct mtk_nor_caps mtk_nor_caps_mt8173 = { +static const struct mtk_nor_caps mtk_nor_caps_mt8173 = { .dma_bits = 32, .extra_dummy_bit = 0, }; -const struct mtk_nor_caps mtk_nor_caps_mt8186 = { +static const struct mtk_nor_caps mtk_nor_caps_mt8186 = { .dma_bits = 32, .extra_dummy_bit = 1, }; -const struct mtk_nor_caps mtk_nor_caps_mt8192 = { +static const struct mtk_nor_caps mtk_nor_caps_mt8192 = { .dma_bits = 36, .extra_dummy_bit = 0, }; -- GitLab From b0596da1a019c092ca5ab64d6999f5e28d52865e Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Thu, 13 Jan 2022 10:06:20 +0000 Subject: [PATCH 0042/1586] EDAC/mc: Remove unnecessary cast to char * in edac_align_ptr() Remove the forgotten (char *) casts as that function returns void *. [ bp: Rewrite commit message. ] Signed-off-by: Eliav Farber Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220113100622.12783-3-farbere@amazon.com --- drivers/edac/edac_mc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 9d9aabdec96b7..f92c4c69a033e 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -213,12 +213,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems) else if (size > sizeof(char)) align = sizeof(short); else - return (char *)ptr; + return ptr; r = (unsigned long)p % align; if (r == 0) - return (char *)ptr; + return ptr; *p += align - r; -- GitLab From d3b1161f29cf479b86d4c3c6f200a8eb27254877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:13 +0100 Subject: [PATCH 0043/1586] selinux: declare name parameter of hash_eval const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit String literals are passed as second argument to hash_eval(). Also the parameter is already declared const in the DEBUG_HASHES configuration. Reported by clang [-Wwrite-strings]: security/selinux/ss/policydb.c:1881:26: error: passing 'const char [8]' to parameter of type 'char *' discards qualifiers hash_eval(&p->range_tr, rangetr); ^~~~~~~~~ security/selinux/ss/policydb.c:707:55: note: passing argument to parameter 'hash_name' here static inline void hash_eval(struct hashtab *h, char *hash_name) ^ security/selinux/ss/policydb.c:2099:32: error: passing 'const char [11]' to parameter of type 'char *' discards qualifiers hash_eval(&p->filename_trans, filenametr); ^~~~~~~~~~~~ security/selinux/ss/policydb.c:707:55: note: passing argument to parameter 'hash_name' here static inline void hash_eval(struct hashtab *h, char *hash_name) ^ Signed-off-by: Christian Göttsche [PM: line wrapping in description] Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 0ae1b718194a3..67e03f6e89667 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -704,7 +704,7 @@ static void symtab_hash_eval(struct symtab *s) } #else -static inline void hash_eval(struct hashtab *h, char *hash_name) +static inline void hash_eval(struct hashtab *h, const char *hash_name) { } #endif -- GitLab From 9e2fe574c02bde46307255467a5e4291f65227fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:14 +0100 Subject: [PATCH 0044/1586] selinux: enclose macro arguments in parenthesis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enclose the macro arguments in parenthesis to avoid potential evaluation order issues. Note the xperm and ebitmap macros are still not side-effect safe due to double evaluation. Reported by clang-tidy [bugprone-macro-parentheses] Signed-off-by: Christian Göttsche Reviewed-by: Nick Desaulniers Signed-off-by: Paul Moore --- security/selinux/include/security.h | 4 ++-- security/selinux/ss/ebitmap.h | 6 +++--- security/selinux/ss/sidtab.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 6482e0efb3689..d91a5672de991 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -254,8 +254,8 @@ struct av_decision { #define XPERMS_AUDITALLOW 2 #define XPERMS_DONTAUDIT 4 -#define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f)) -#define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f))) +#define security_xperm_set(perms, x) ((perms)[(x) >> 5] |= 1 << ((x) & 0x1f)) +#define security_xperm_test(perms, x) (1 & ((perms)[(x) >> 5] >> ((x) & 0x1f))) struct extended_perms_data { u32 p[8]; }; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 9eb2d0af2805e..58eb822f11eef 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -118,9 +118,9 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n, } #define ebitmap_for_each_positive_bit(e, n, bit) \ - for (bit = ebitmap_start_positive(e, &n); \ - bit < ebitmap_length(e); \ - bit = ebitmap_next_positive(e, &n, bit)) \ + for ((bit) = ebitmap_start_positive(e, &(n)); \ + (bit) < ebitmap_length(e); \ + (bit) = ebitmap_next_positive(e, &(n), bit)) \ int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 293ec048af08c..a54b8652bfb50 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -27,8 +27,8 @@ struct sidtab_str_cache { char str[]; }; -#define index_to_sid(index) (index + SECINITSID_NUM + 1) -#define sid_to_index(sid) (sid - (SECINITSID_NUM + 1)) +#define index_to_sid(index) ((index) + SECINITSID_NUM + 1) +#define sid_to_index(sid) ((sid) - (SECINITSID_NUM + 1)) int sidtab_init(struct sidtab *s) { -- GitLab From 0b3c2b3dc96a57fd64691d666c4c7123a4f4e7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:15 +0100 Subject: [PATCH 0045/1586] selinux: drop cast to same type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the lvalue scontextp and rvalue scontext are of the type char*. Drop the redundant explicit cast not needed since commit 9a59daa03df7 ("SELinux: fix sleeping allocation in security_context_to_sid"), where the type of scontext changed from const char* to char*. Signed-off-by: Christian Göttsche Signed-off-by: Paul Moore --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 5a7df45bdab17..2f8db93e53b29 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1452,7 +1452,7 @@ static int string_to_context_struct(struct policydb *pol, /* Parse the security context. */ rc = -EINVAL; - scontextp = (char *) scontext; + scontextp = scontext; /* Extract the user. */ p = scontextp; -- GitLab From 056945a96cf58060560498e069a10d94a1ef802b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:16 +0100 Subject: [PATCH 0046/1586] selinux: drop unused parameter of avtab_insert_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter cur is not used in avtab_insert_node(). Reported by clang [-Wunused-parameter] Signed-off-by: Christian Göttsche Signed-off-by: Paul Moore --- security/selinux/ss/avtab.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index c97695ae508f1..cfdae20792e11 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -67,7 +67,7 @@ static inline int avtab_hash(const struct avtab_key *keyp, u32 mask) static struct avtab_node* avtab_insert_node(struct avtab *h, int hvalue, - struct avtab_node *prev, struct avtab_node *cur, + struct avtab_node *prev, const struct avtab_key *key, const struct avtab_datum *datum) { struct avtab_node *newnode; @@ -137,7 +137,7 @@ static int avtab_insert(struct avtab *h, const struct avtab_key *key, break; } - newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum); + newnode = avtab_insert_node(h, hvalue, prev, key, datum); if (!newnode) return -ENOMEM; @@ -177,7 +177,7 @@ struct avtab_node *avtab_insert_nonunique(struct avtab *h, key->target_class < cur->key.target_class) break; } - return avtab_insert_node(h, hvalue, prev, cur, key, datum); + return avtab_insert_node(h, hvalue, prev, key, datum); } struct avtab_datum *avtab_search(struct avtab *h, const struct avtab_key *key) -- GitLab From 73073d956a2073554b99d621a7a7ec9437055044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:17 +0100 Subject: [PATCH 0047/1586] selinux: do not discard const qualifier in cast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not discard the const qualifier on the cast from const void* to __be32*; the addressed value is not modified. Reported by clang [-Wcast-qual] Signed-off-by: Christian Göttsche Signed-off-by: Paul Moore --- security/selinux/netnode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index 4a7d2ab5b9609..889552db0d31a 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -107,7 +107,7 @@ static struct sel_netnode *sel_netnode_find(const void *addr, u16 family) switch (family) { case PF_INET: - idx = sel_netnode_hashfn_ipv4(*(__be32 *)addr); + idx = sel_netnode_hashfn_ipv4(*(const __be32 *)addr); break; case PF_INET6: idx = sel_netnode_hashfn_ipv6(addr); @@ -121,7 +121,7 @@ static struct sel_netnode *sel_netnode_find(const void *addr, u16 family) if (node->nsec.family == family) switch (family) { case PF_INET: - if (node->nsec.addr.ipv4 == *(__be32 *)addr) + if (node->nsec.addr.ipv4 == *(const __be32 *)addr) return node; break; case PF_INET6: -- GitLab From b084e189b01a7614d3098aca4f2381a759460d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:18 +0100 Subject: [PATCH 0048/1586] selinux: simplify cred_init_security MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter of selinux_cred() is declared const, so an explicit cast dropping the const qualifier is not necessary. Without the cast the local variable cred serves no purpose. Reported by clang [-Wcast-qual] Signed-off-by: Christian Göttsche Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a0243bae84238..eae7dbd62df16 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -211,10 +211,9 @@ static int selinux_lsm_notifier_avc_callback(u32 event) */ static void cred_init_security(void) { - struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; - tsec = selinux_cred(cred); + tsec = selinux_cred(current->real_cred); tsec->osid = tsec->sid = SECINITSID_KERNEL; } -- GitLab From b5e68162f859132af419af479bdb96e2ae18fa2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Tue, 25 Jan 2022 15:14:19 +0100 Subject: [PATCH 0049/1586] selinux: drop unused macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro _DEBUG_HASHES is nowhere used. The configuration DEBUG_HASHES enables debugging of the SELinux hash tables, but the with an underscore prefixed macro definition has no direct impact or any documentation. Reported by clang [-Wunused-macros] Signed-off-by: Christian Göttsche Reviewed-by: Nick Desaulniers Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 67e03f6e89667..d036e1238e771 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -41,8 +41,6 @@ #include "mls.h" #include "services.h" -#define _DEBUG_HASHES - #ifdef DEBUG_HASHES static const char *symtab_name[SYM_NUM] = { "common prefixes", -- GitLab From cdeea45422f579b9302e377d1ede29133d3fde8e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 27 Jan 2022 10:45:59 -0500 Subject: [PATCH 0050/1586] selinux: fix a type cast problem in cred_init_security() In the process of removing an explicit type cast to preserve a cred const qualifier in cred_init_security() we ran into a problem where the task_struct::real_cred field is defined with the "__rcu" attribute but the selinux_cred() function parameter is not, leading to a sparse warning: security/selinux/hooks.c:216:36: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct cred const *cred @@ got struct cred const [noderef] __rcu *real_cred As we don't want to add the "__rcu" attribute to the selinux_cred() parameter, we're going to add an explicit cast back to cred_init_security(). Fixes: b084e189b01a ("selinux: simplify cred_init_security") Reported-by: kernel test robot Signed-off-by: Paul Moore --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index eae7dbd62df16..221e642025f53 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -213,7 +213,7 @@ static void cred_init_security(void) { struct task_security_struct *tsec; - tsec = selinux_cred(current->real_cred); + tsec = selinux_cred(unrcu_pointer(current->real_cred)); tsec->osid = tsec->sid = SECINITSID_KERNEL; } -- GitLab From c75c6a8add370cde88117901f21cf0d6eaf09a8f Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Wed, 29 Dec 2021 10:42:10 +0800 Subject: [PATCH 0051/1586] docs/zh_CN: Cleanup index.rst Many */index in the Chinese index.rst are not in the same order as the English version. Put them to where they should be. Signed-off-by: Tang Yizhou Reviewed-by: Yanteng Si Link: https://lore.kernel.org/r/20211229024212.32752-2-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/index.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index 46e14ec9963da..e7f5fa3e75027 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -104,13 +104,13 @@ TODOList: :maxdepth: 2 core-api/index + accounting/index cpu-freq/index iio/index + infiniband/index + virt/index sound/index filesystems/index - virt/index - infiniband/index - accounting/index scheduler/index TODOList: -- GitLab From 30e61d38f01d77d6d1f76c7566e70d46eff3724d Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Wed, 29 Dec 2021 10:42:11 +0800 Subject: [PATCH 0052/1586] docs/zh_CN: Add power/index Chinese translation Translate power/index.rst into Chinese. Signed-off-by: Tang Yizhou Reviewed-by: Alex Shi Reviewed-by: Yanteng Si Link: https://lore.kernel.org/r/20211229024212.32752-3-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/index.rst | 2 +- .../translations/zh_CN/power/index.rst | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/power/index.rst diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index e7f5fa3e75027..08144252630c3 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -108,6 +108,7 @@ TODOList: cpu-freq/index iio/index infiniband/index + power/index virt/index sound/index filesystems/index @@ -129,7 +130,6 @@ TODOList: * netlabel/index * networking/index * pcmcia/index -* power/index * target/index * timers/index * spi/index diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst new file mode 100644 index 0000000000000..fd379adfda8e6 --- /dev/null +++ b/Documentation/translations/zh_CN/power/index.rst @@ -0,0 +1,55 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/power/index.rst + +:翻译: + + 唐艺舟 Tang Yizhou + +======== +电源管理 +======== + +.. toctree:: + :maxdepth: 1 + +TODOList: + + * apm-acpi + * basic-pm-debugging + * charger-manager + * drivers-testing + * energy-model + * freezing-of-tasks + * opp + * pci + * pm_qos_interface + * power_supply_class + * runtime_pm + * s2ram + * suspend-and-cpuhotplug + * suspend-and-interrupts + * swsusp-and-swap-files + * swsusp-dmcrypt + * swsusp + * video + * tricks + + * userland-swsusp + + * powercap/powercap + * powercap/dtpm + + * regulator/consumer + * regulator/design + * regulator/machine + * regulator/overview + * regulator/regulator + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` -- GitLab From dd774a07ddfcb4dfe15778ea30cd5bb592ffab29 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Wed, 29 Dec 2021 10:42:12 +0800 Subject: [PATCH 0053/1586] docs/zh_CN: Add opp Chinese translation Translate power/opp.rst into Chinese. Signed-off-by: Tang Yizhou Reviewed-by: Alex Shi Reviewed-by: Yanteng Si Link: https://lore.kernel.org/r/20211229024212.32752-4-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/power/index.rst | 3 +- .../translations/zh_CN/power/opp.rst | 341 ++++++++++++++++++ 2 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/power/opp.rst diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst index fd379adfda8e6..ad80a9e80b7cd 100644 --- a/Documentation/translations/zh_CN/power/index.rst +++ b/Documentation/translations/zh_CN/power/index.rst @@ -14,6 +14,8 @@ .. toctree:: :maxdepth: 1 + opp + TODOList: * apm-acpi @@ -22,7 +24,6 @@ TODOList: * drivers-testing * energy-model * freezing-of-tasks - * opp * pci * pm_qos_interface * power_supply_class diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst new file mode 100644 index 0000000000000..8d6e3f6f62024 --- /dev/null +++ b/Documentation/translations/zh_CN/power/opp.rst @@ -0,0 +1,341 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/power/opp.rst + +:翻译: + + 唐艺舟 Tang Yizhou + +====================== +操作性能值(OPP)库 +====================== + +(C) 2009-2010 Nishanth Menon , 德州仪器公司 + +.. 目录 + + 1. 简介 + 2. OPP链表初始注册 + 3. OPP搜索函数 + 4. OPP可用性控制函数 + 5. OPP数据检索函数 + 6. 数据结构 + +1. 简介 +======= + +1.1 何为操作性能值(OPP)? +------------------------------ + +当今复杂的单片系统(SoC)由多个子模块组成,这些子模块会联合工作。在一个执行不同用例 +的操作系统中,并不是SoC中的所有模块都需要一直以最高频率工作。为了促成这一点,SoC中 +的子模块被分组为不同域,允许一些域以较低的电压和频率运行,而其它域则以较高的“电压/ +频率对”运行。 + +设备按域支持的由频率电压对组成的离散的元组的集合,被称为操作性能值(组),或OPPs。 + +举例来说: + +让我们考虑一个支持下述频率、电压值的内存保护单元(MPU)设备: +{300MHz,最低电压为1V}, {800MHz,最低电压为1.2V}, {1GHz,最低电压为1.3V} + +我们能将它们表示为3个OPP,如下述{Hz, uV}元组(译注:频率的单位是赫兹,电压的单位是 +微伏)。 + +- {300000000, 1000000} +- {800000000, 1200000} +- {1000000000, 1300000} + +1.2 操作性能值库 +---------------- + +OPP库提供了一组辅助函数来组织和查询OPP信息。该库位于drivers/opp/目录下,其头文件 +位于include/linux/pm_opp.h中。OPP库可以通过开启CONFIG_PM_OPP来启用。某些SoC, +如德州仪器的OMAP框架允许在不需要cpufreq的情况下可选地在某一OPP下启动。 + +OPP库的典型用法如下:: + + (用户) -> 注册一个默认的OPP集合 -> (库) + (SoC框架) -> 在必要的情况下,对某些OPP进行修改 -> OPP layer + -> 搜索/检索信息的查询 -> + +OPP层期望每个域由一个唯一的设备指针来表示。SoC框架在OPP层为每个设备注册了一组初始 +OPP。这个链表的长度被期望是一个最优化的小数字,通常每个设备大约5个。初始链表包含了 +一个OPP集合,这个集合被期望能在系统中安全使能。 + +关于OPP可用性的说明 +^^^^^^^^^^^^^^^^^^^ + +随着系统的运行,SoC框架可能会基于各种外部因素选择让某些OPP在每个设备上可用或不可用, +示例:温度管理或其它异常场景中,SoC框架可能会选择禁用一个较高频率的OPP以安全地继续 +运行,直到该OPP被重新启用(如果可能)。 + +OPP库在它的实现中达成了这个概念。以下操作函数只能对可用的OPP使用: +dev_pm_opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, +dev_pm_opp_get_freq, dev_pm_opp_get_opp_count。 + +dev_pm_opp_find_freq_exact是用来查找OPP指针的,该指针可被用在dev_pm_opp_enable/ +disable函数,使一个OPP在被需要时变为可用。 + +警告:如果对一个设备调用dev_pm_opp_enable/disable函数,OPP库的用户应该使用 +dev_pm_opp_get_opp_count来刷新OPP的可用性计数。触发这些的具体机制,或者对有依赖的 +子系统(比如cpufreq)的通知机制,都是由使用OPP库的SoC特定框架酌情处理的。在这些操作 +中,同样需要注意刷新cpufreq表。 + +2. OPP链表初始注册 +================== +SoC的实现会迭代调用dev_pm_opp_add函数来增加每个设备的OPP。预期SoC框架将以最优的 +方式注册OPP条目 - 典型的数字范围小于5。通过注册OPP生成的OPP链表,在整个设备运行过程 +中由OPP库维护。SoC框架随后可以使用dev_pm_opp_enable / disable函数动态地 +控制OPP的可用性。 + +dev_pm_opp_add + 为设备指针所指向的特定域添加一个新的OPP。OPP是用频率和电压定义的。一旦完成 + 添加,OPP被认为是可用的,可以用dev_pm_opp_enable/disable函数来控制其可用性。 + OPP库内部用dev_pm_opp结构体存储并管理这些信息。这个函数可以被SoC框架根据SoC + 的使用环境的需求来定义一个最优链表。 + + 警告: + 不要在中断上下文使用这个函数。 + + 示例:: + + soc_pm_init() + { + /* 做一些事情 */ + r = dev_pm_opp_add(mpu_dev, 1000000, 900000); + if (!r) { + pr_err("%s: unable to register mpu opp(%d)\n", r); + goto no_cpufreq; + } + /* 做一些和cpufreq相关的事情 */ + no_cpufreq: + /* 做剩余的事情 */ + } + +3. OPP搜索函数 +============== +cpufreq等高层框架对频率进行操作,为了将频率映射到相应的OPP,OPP库提供了便利的函数 +来搜索OPP库内部管理的OPP链表。这些搜索函数如果找到匹配的OPP,将返回指向该OPP的指针, +否则返回错误。这些错误预计由标准的错误检查,如IS_ERR()来处理,并由调用者采取适当的 +行动。 + +这些函数的调用者应在使用完OPP后调用dev_pm_opp_put()。否则,OPP的内存将永远不会 +被释放,并导致内存泄露。 + +dev_pm_opp_find_freq_exact + 根据 *精确的* 频率和可用性来搜索OPP。这个函数对默认不可用的OPP特别有用。 + 例子:在SoC框架检测到更高频率可用的情况下,它可以使用这个函数在调用 + dev_pm_opp_enable之前找到OPP:: + + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); + dev_pm_opp_put(opp); + /* 不要操作指针.. 只是做有效性检查.. */ + if (IS_ERR(opp)) { + pr_err("frequency not disabled!\n"); + /* 触发合适的操作.. */ + } else { + dev_pm_opp_enable(dev,1000000000); + } + + 注意: + 这是唯一一个可以搜索不可用OPP的函数。 + +dev_pm_opp_find_freq_floor + 搜索一个 *最多* 提供指定频率的可用OPP。这个函数在搜索较小的匹配或按频率 + 递减的顺序操作OPP信息时很有用。 + 例子:要找的一个设备的最高OPP:: + + freq = ULONG_MAX; + opp = dev_pm_opp_find_freq_floor(dev, &freq); + dev_pm_opp_put(opp); + +dev_pm_opp_find_freq_ceil + 搜索一个 *最少* 提供指定频率的可用OPP。这个函数在搜索较大的匹配或按频率 + 递增的顺序操作OPP信息时很有用。 + 例1:找到一个设备最小的OPP:: + + freq = 0; + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + dev_pm_opp_put(opp); + + 例: 一个SoC的cpufreq_driver->target的简易实现:: + + soc_cpufreq_target(..) + { + /* 做策略检查等操作 */ + /* 找到和请求最接近的频率 */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + soc_switch_to_freq_voltage(freq); + else + /* 当不能满足请求时,要做的事 */ + /* 做其它事 */ + } + +4. OPP可用性控制函数 +==================== +在OPP库中注册的默认OPP链表也许无法满足所有可能的场景。OPP库提供了一套函数来修改 +OPP链表中的某个OPP的可用性。这使得SoC框架能够精细地动态控制哪一组OPP是可用于操作 +的。设计这些函数的目的是在诸如考虑温度时 *暂时地* 删除某个OPP(例如,在温度下降 +之前不要使用某OPP)。 + +警告: + 不要在中断上下文使用这些函数。 + +dev_pm_opp_enable + 使一个OPP可用于操作。 + 例子:假设1GHz的OPP只有在SoC温度低于某个阈值时才可用。SoC框架的实现可能 + 会选择做以下事情:: + + if (cur_temp < temp_low_thresh) { + /* 若1GHz未使能,则使能 */ + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); + dev_pm_opp_put(opp); + /* 仅仅是错误检查 */ + if (!IS_ERR(opp)) + ret = dev_pm_opp_enable(dev, 1000000000); + else + goto try_something_else; + } + +dev_pm_opp_disable + 使一个OPP不可用于操作。 + 例子:假设1GHz的OPP只有在SoC温度高于某个阈值时才可用。SoC框架的实现可能 + 会选择做以下事情:: + + if (cur_temp > temp_high_thresh) { + /* 若1GHz已使能,则关闭 */ + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true); + dev_pm_opp_put(opp); + /* 仅仅是错误检查 */ + if (!IS_ERR(opp)) + ret = dev_pm_opp_disable(dev, 1000000000); + else + goto try_something_else; + } + +5. OPP数据检索函数 +================== +由于OPP库对OPP信息进行了抽象化处理,因此需要一组函数来从dev_pm_opp结构体中提取 +信息。一旦使用搜索函数检索到一个OPP指针,以下函数就可以被SoC框架用来检索OPP层 +内部描述的信息。 + +dev_pm_opp_get_voltage + 检索OPP指针描述的电压。 + 例子: 当cpufreq切换到到不同频率时,SoC框架需要用稳压器框架将OPP描述 + 的电压设置到提供电压的电源管理芯片中:: + + soc_switch_to_freq_voltage(freq) + { + /* 做一些事情 */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + v = dev_pm_opp_get_voltage(opp); + dev_pm_opp_put(opp); + if (v) + regulator_set_voltage(.., v); + /* 做其它事 */ + } + +dev_pm_opp_get_freq + 检索OPP指针描述的频率。 + 例子:比方说,SoC框架使用了几个辅助函数,通过这些函数,我们可以将OPP + 指针传入,而不是传入额外的参数,用来处理一系列数据参数:: + + soc_cpufreq_target(..) + { + /* 做一些事情.. */ + max_freq = ULONG_MAX; + max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq); + requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq); + if (!IS_ERR(max_opp) && !IS_ERR(requested_opp)) + r = soc_test_validity(max_opp, requested_opp); + dev_pm_opp_put(max_opp); + dev_pm_opp_put(requested_opp); + /* 做其它事 */ + } + soc_test_validity(..) + { + if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp)) + return -EINVAL; + if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp)) + return -EINVAL; + /* 做一些事情.. */ + } + +dev_pm_opp_get_opp_count + 检索某个设备可用的OPP数量。 + 例子:假设SoC中的一个协处理器需要知道某个表中的可用频率,主处理器可以 + 按如下方式发出通知:: + + soc_notify_coproc_available_frequencies() + { + /* 做一些事情 */ + num_available = dev_pm_opp_get_opp_count(dev); + speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); + /* 按升序填充表 */ + freq = 0; + while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) { + speeds[i] = freq; + freq++; + i++; + dev_pm_opp_put(opp); + } + + soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available); + /* 做其它事 */ + } + +6. 数据结构 +=========== +通常,一个SoC包含多个可变电压域。每个域由一个设备指针描述。和OPP之间的关系可以 +按以下方式描述:: + + SoC + |- device 1 + | |- opp 1 (availability, freq, voltage) + | |- opp 2 .. + ... ... + | `- opp n .. + |- device 2 + ... + `- device m + +OPP库维护着一个内部链表,SoC框架使用上文描述的各个函数来填充和访问。然而,描述 +真实OPP和域的结构体是OPP库自身的内部组成,以允许合适的抽象在不同系统中得到复用。 + +struct dev_pm_opp + OPP库的内部数据结构,用于表示一个OPP。除了频率、电压、可用性信息外, + 它还包含OPP库运行所需的内部统计信息。指向这个结构体的指针被提供给 + 用户(比如SoC框架)使用,在与OPP层的交互中作为OPP的标识符。 + + 警告: + 结构体dev_pm_opp的指针不应该由用户解析或修改。一个实例的默认值由 + dev_pm_opp_add填充,但OPP的可用性由dev_pm_opp_enable/disable函数 + 修改。 + +struct device + 这用于向OPP层标识一个域。设备的性质和它的实现是由OPP库的用户决定的, + 如SoC框架。 + +总体来说,以一个简化的视角看,对数据结构的操作可以描述为下面各图:: + + 初始化 / 修改: + +-----+ /- dev_pm_opp_enable + dev_pm_opp_add --> | opp | <------- + | +-----+ \- dev_pm_opp_disable + \-------> domain_info(device) + + 搜索函数: + /-- dev_pm_opp_find_freq_ceil ---\ +-----+ + domain_info<---- dev_pm_opp_find_freq_exact -----> | opp | + \-- dev_pm_opp_find_freq_floor ---/ +-----+ + + 检索函数: + +-----+ /- dev_pm_opp_get_voltage + | opp | <--- + +-----+ \- dev_pm_opp_get_freq + + domain_info <- dev_pm_opp_get_opp_count -- GitLab From bf026e2e316ba57135b70e8ce591276239c7b2cf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jan 2022 11:50:43 +0100 Subject: [PATCH 0054/1586] Documentation: Fill the gaps about entry/noinstr constraints The entry/exit handling for exceptions, interrupts, syscalls and KVM is not really documented except for some comments. Fill the gaps. Signed-off-by: Thomas Gleixner Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Mark Rutland Reviewed-by: Paul E. McKenney ---- Changes since v3: - s/nointr/noinstr/ Changes since v2: - No big content changes, just style corrections, so it should be pretty clean at this stage. In the light of this, I kept Mark's Reviewed-by. - Paul's style and paragraph re-writes - Randy's style comments - Add links to transition type sections Documentation/core-api/entry.rst | 261 +++++++++++++++++++++++++++++++ Documentation/core-api/index.rst | 8 + 2 files changed, 269 insertions(+) create mode 100644 Documentation/core-api/entry.rst Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220110105044.94423-1-nsaenzju@redhat.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/entry.rst | 261 +++++++++++++++++++++++++++++++ Documentation/core-api/index.rst | 8 + 2 files changed, 269 insertions(+) create mode 100644 Documentation/core-api/entry.rst diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst new file mode 100644 index 0000000000000..c6f8e22c88fe8 --- /dev/null +++ b/Documentation/core-api/entry.rst @@ -0,0 +1,261 @@ +Entry/exit handling for exceptions, interrupts, syscalls and KVM +================================================================ + +All transitions between execution domains require state updates which are +subject to strict ordering constraints. State updates are required for the +following: + + * Lockdep + * RCU / Context tracking + * Preemption counter + * Tracing + * Time accounting + +The update order depends on the transition type and is explained below in +the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular +exceptions`_, `NMI and NMI-like exceptions`_. + +Non-instrumentable code - noinstr +--------------------------------- + +Most instrumentation facilities depend on RCU, so intrumentation is prohibited +for entry code before RCU starts watching and exit code after RCU stops +watching. In addition, many architectures must save and restore register state, +which means that (for example) a breakpoint in the breakpoint entry code would +overwrite the debug registers of the initial breakpoint. + +Such code must be marked with the 'noinstr' attribute, placing that code into a +special section inaccessible to instrumentation and debug facilities. Some +functions are partially instrumentable, which is handled by marking them +noinstr and using instrumentation_begin() and instrumentation_end() to flag the +instrumentable ranges of code: + +.. code-block:: c + + noinstr void entry(void) + { + handle_entry(); // <-- must be 'noinstr' or '__always_inline' + ... + + instrumentation_begin(); + handle_context(); // <-- instrumentable code + instrumentation_end(); + + ... + handle_exit(); // <-- must be 'noinstr' or '__always_inline' + } + +This allows verification of the 'noinstr' restrictions via objtool on +supported architectures. + +Invoking non-instrumentable functions from instrumentable context has no +restrictions and is useful to protect e.g. state switching which would +cause malfunction if instrumented. + +All non-instrumentable entry/exit code sections before and after the RCU +state transitions must run with interrupts disabled. + +Syscalls +-------- + +Syscall-entry code starts in assembly code and calls out into low-level C code +after establishing low-level architecture-specific state and stack frames. This +low-level C code must not be instrumented. A typical syscall handling function +invoked from low-level assembly code looks like this: + +.. code-block:: c + + noinstr void syscall(struct pt_regs *regs, int nr) + { + arch_syscall_enter(regs); + nr = syscall_enter_from_user_mode(regs, nr); + + instrumentation_begin(); + if (!invoke_syscall(regs, nr) && nr != -1) + result_reg(regs) = __sys_ni_syscall(regs); + instrumentation_end(); + + syscall_exit_to_user_mode(regs); + } + +syscall_enter_from_user_mode() first invokes enter_from_user_mode() which +establishes state in the following order: + + * Lockdep + * RCU / Context tracking + * Tracing + +and then invokes the various entry work functions like ptrace, seccomp, audit, +syscall tracing, etc. After all that is done, the instrumentable invoke_syscall +function can be invoked. The instrumentable code section then ends, after which +syscall_exit_to_user_mode() is invoked. + +syscall_exit_to_user_mode() handles all work which needs to be done before +returning to user space like tracing, audit, signals, task work etc. After +that it invokes exit_to_user_mode() which again handles the state +transition in the reverse order: + + * Tracing + * RCU / Context tracking + * Lockdep + +syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also +available as fine grained subfunctions in cases where the architecture code +has to do extra work between the various steps. In such cases it has to +ensure that enter_from_user_mode() is called first on entry and +exit_to_user_mode() is called last on exit. + + +KVM +--- + +Entering or exiting guest mode is very similar to syscalls. From the host +kernel point of view the CPU goes off into user space when entering the +guest and returns to the kernel on exit. + +kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode() +and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode(). +The state operations have the same ordering. + +Task work handling is done separately for guest at the boundary of the +vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of +the work handled on return to user space. + +Interrupts and regular exceptions +--------------------------------- + +Interrupts entry and exit handling is slightly more complex than syscalls +and KVM transitions. + +If an interrupt is raised while the CPU executes in user space, the entry +and exit handling is exactly the same as for syscalls. + +If the interrupt is raised while the CPU executes in kernel space the entry and +exit handling is slightly different. RCU state is only updated when the +interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will +already be watching. Lockdep and tracing have to be updated unconditionally. + +irqentry_enter() and irqentry_exit() provide the implementation for this. + +The architecture-specific part looks similar to syscall handling: + +.. code-block:: c + + noinstr void interrupt(struct pt_regs *regs, int nr) + { + arch_interrupt_enter(regs); + state = irqentry_enter(regs); + + instrumentation_begin(); + + irq_enter_rcu(); + invoke_irq_handler(regs, nr); + irq_exit_rcu(); + + instrumentation_end(); + + irqentry_exit(regs, state); + } + +Note that the invocation of the actual interrupt handler is within a +irq_enter_rcu() and irq_exit_rcu() pair. + +irq_enter_rcu() updates the preemption count which makes in_hardirq() +return true, handles NOHZ tick state and interrupt time accounting. This +means that up to the point where irq_enter_rcu() is invoked in_hardirq() +returns false. + +irq_exit_rcu() handles interrupt time accounting, undoes the preemption +count update and eventually handles soft interrupts and NOHZ tick state. + +In theory, the preemption count could be updated in irqentry_enter(). In +practice, deferring this update to irq_enter_rcu() allows the preemption-count +code to be traced, while also maintaining symmetry with irq_exit_rcu() and +irqentry_exit(), which are described in the next paragraph. The only downside +is that the early entry code up to irq_enter_rcu() must be aware that the +preemption count has not yet been updated with the HARDIRQ_OFFSET state. + +Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count +before it handles soft interrupts, whose handlers must run in BH context rather +than irq-disabled context. In addition, irqentry_exit() might schedule, which +also requires that HARDIRQ_OFFSET has been removed from the preemption count. + +NMI and NMI-like exceptions +--------------------------- + +NMIs and NMI-like exceptions (machine checks, double faults, debug +interrupts, etc.) can hit any context and must be extra careful with +the state. + +State changes for debug exceptions and machine-check exceptions depend on +whether these exceptions happened in user-space (breakpoints or watchpoints) or +in kernel mode (code patching). From user-space, they are treated like +interrupts, while from kernel mode they are treated like NMIs. + +NMIs and other NMI-like exceptions handle state transitions without +distinguishing between user-mode and kernel-mode origin. + +The state update on entry is handled in irqentry_nmi_enter() which updates +state in the following order: + + * Preemption counter + * Lockdep + * RCU / Context tracking + * Tracing + +The exit counterpart irqentry_nmi_exit() does the reverse operation in the +reverse order. + +Note that the update of the preemption counter has to be the first +operation on enter and the last operation on exit. The reason is that both +lockdep and RCU rely on in_nmi() returning true in this case. The +preemption count modification in the NMI entry/exit case must not be +traced. + +Architecture-specific code looks like this: + +.. code-block:: c + + noinstr void nmi(struct pt_regs *regs) + { + arch_nmi_enter(regs); + state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + nmi_handler(regs); + instrumentation_end(); + + irqentry_nmi_exit(regs); + } + +and for e.g. a debug exception it can look like this: + +.. code-block:: c + + noinstr void debug(struct pt_regs *regs) + { + arch_nmi_enter(regs); + + debug_regs = save_debug_regs(); + + if (user_mode(regs)) { + state = irqentry_enter(regs); + + instrumentation_begin(); + user_mode_debug_handler(regs, debug_regs); + instrumentation_end(); + + irqentry_exit(regs, state); + } else { + state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + kernel_mode_debug_handler(regs, debug_regs); + instrumentation_end(); + + irqentry_nmi_exit(regs, state); + } + } + +There is no combined irqentry_nmi_if_kernel() function available as the +above cannot be handled in an exception-agnostic way. diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 5de2c7a4b1b3c..972d46a5ddf62 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel. timekeeping errseq +Low level entry and exit +======================== + +.. toctree:: + :maxdepth: 1 + + entry + Concurrency primitives ====================== -- GitLab From e3aa43e936d854373d9a75372aefcefebfca208f Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 10 Jan 2022 11:50:44 +0100 Subject: [PATCH 0055/1586] Documentation: core-api: entry: Add comments about nesting The topic of nesting and reentrancy in the context of early entry code hasn't been addressed so far. So do it. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Link: https://lore.kernel.org/r/20220110105044.94423-2-nsaenzju@redhat.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/entry.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst index c6f8e22c88fe8..e12f22ab33c7b 100644 --- a/Documentation/core-api/entry.rst +++ b/Documentation/core-api/entry.rst @@ -105,6 +105,8 @@ has to do extra work between the various steps. In such cases it has to ensure that enter_from_user_mode() is called first on entry and exit_to_user_mode() is called last on exit. +Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking +to print a warning. KVM --- @@ -121,6 +123,8 @@ Task work handling is done separately for guest at the boundary of the vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of the work handled on return to user space. +Do not nest KVM entry/exit transitions because doing so is nonsensical. + Interrupts and regular exceptions --------------------------------- @@ -180,6 +184,16 @@ before it handles soft interrupts, whose handlers must run in BH context rather than irq-disabled context. In addition, irqentry_exit() might schedule, which also requires that HARDIRQ_OFFSET has been removed from the preemption count. +Even though interrupt handlers are expected to run with local interrupts +disabled, interrupt nesting is common from an entry/exit perspective. For +example, softirq handling happens within an irqentry_{enter,exit}() block with +local interrupts enabled. Also, although uncommon, nothing prevents an +interrupt handler from re-enabling interrupts. + +Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it +runs with local interrupts disabled. But NMIs can happen anytime, and a lot of +the entry code is shared between the two. + NMI and NMI-like exceptions --------------------------- @@ -259,3 +273,7 @@ and for e.g. a debug exception it can look like this: There is no combined irqentry_nmi_if_kernel() function available as the above cannot be handled in an exception-agnostic way. + +NMIs can happen in any context. For example, an NMI-like exception triggered +while handling an NMI. So NMI entry code has to be reentrant and state updates +need to handle nesting. -- GitLab From 9c3519d2b50968ded1373e872fcc34ca3d748007 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 11 Jan 2022 10:13:14 +0800 Subject: [PATCH 0056/1586] docs/zh_CN: add vm/index translation Translate ../vm/index.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Tang Yizhou Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/60c579b34792c4c76194c4843a695263a982b37d.1641866889.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/index.rst | 2 +- Documentation/translations/zh_CN/vm/index.rst | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/index.rst diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index 08144252630c3..f831887eacfb1 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -113,6 +113,7 @@ TODOList: sound/index filesystems/index scheduler/index + vm/index TODOList: @@ -140,7 +141,6 @@ TODOList: * gpu/index * security/index * crypto/index -* vm/index * bpf/index * usb/index * PCI/index diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst new file mode 100644 index 0000000000000..0fda40c602ac8 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -0,0 +1,52 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/index.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + +================= +Linux内存管理文档 +================= + +这是一个关于Linux内存管理(mm)子系统内部的文档集,其中有不同层次的细节,包括注释 +和邮件列表的回复,用于阐述数据结构和算法的基本情况。如果你正在寻找关于简单分配内存的建 +议,请参阅(Documentation/translations/zh_CN/core-api/memory-allocation.rst)。 +对于控制和调整指南,请参阅(Documentation/admin-guide/mm/index)。 +TODO:待引用文档集被翻译完毕后请及时修改此处) + +.. toctree:: + :maxdepth: 1 + + +TODOLIST: +* active_mm +* arch_pgtable_helpers +* balance +* damon/index +* free_page_reporting +* frontswap +* highmem +* hmm +* hwpoison +* hugetlbfs_reserv +* ksm +* memory-model +* mmu_notifier +* numa +* overcommit-accounting +* page_migration +* page_frags +* page_owner +* page_table_check +* remap_file_pages +* slub +* split_page_table_lock +* transhuge +* unevictable-lru +* vmalloced-kernel-stacks +* z3fold +* zsmalloc -- GitLab From 6f5dbb213c140f04fc6d49b70b178ee11333c5f3 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 11 Jan 2022 10:13:15 +0800 Subject: [PATCH 0057/1586] docs_zh_CN: add active_mm translation Translatr .../vm/active_mm.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Reviewed-by: Tang Yizhou Link: https://lore.kernel.org/r/99ba014bdd9550bad57db6c21653cb7314d7c2d8.1641866889.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/active_mm.rst | 85 +++++++++++++++++++ Documentation/translations/zh_CN/vm/index.rst | 2 +- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/active_mm.rst diff --git a/Documentation/translations/zh_CN/vm/active_mm.rst b/Documentation/translations/zh_CN/vm/active_mm.rst new file mode 100644 index 0000000000000..366609ea4f375 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/active_mm.rst @@ -0,0 +1,85 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/active_mm.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + + +========= +Active MM +========= + +这是一封linux之父回复开发者的一封邮件,所以翻译时我尽量保持邮件格式的完整。 + +:: + + List: linux-kernel + Subject: Re: active_mm + From: Linus Torvalds + Date: 1999-07-30 21:36:24 + + 因为我并不经常写解释,所以已经抄送到linux-kernel邮件列表,而当我做这些, + 且更多的人在阅读它们时,我觉得棒极了。 + + 1999年7月30日 星期五, David Mosberger 写道: + > + > 是否有一个简短的描述,说明task_struct中的 + > "mm" 和 "active_mm"应该如何使用? (如果 + > 这个问题在邮件列表中讨论过,我表示歉意--我刚 + > 刚度假回来,有一段时间没能关注linux-kernel了)。 + + 基本上,新的设定是: + + - 我们有“真实地址空间”和“匿名地址空间”。区别在于,匿名地址空间根本不关心用 + 户级页表,所以当我们做上下文切换到匿名地址空间时,我们只是让以前的地址空间 + 处于活动状态。 + + 一个“匿名地址空间”的明显用途是任何不需要任何用户映射的线程--所有的内核线 + 程基本上都属于这一类,但即使是“真正的”线程也可以暂时说在一定时间内它们不 + 会对用户空间感兴趣,调度器不妨试着避免在切换VM状态上浪费时间。目前只有老 + 式的bdflush sync能做到这一点。 + + - “tsk->mm” 指向 “真实地址空间”。对于一个匿名进程来说,tsk->mm将是NULL, + 其逻辑原因是匿名进程实际上根本就 “没有” 真正的地址空间。 + + - 然而,我们显然需要跟踪我们为这样的匿名用户“偷用”了哪个地址空间。为此,我们 + 有 “tsk->active_mm”,它显示了当前活动的地址空间是什么。 + + 规则是,对于一个有真实地址空间的进程(即tsk->mm是 non-NULL),active_mm + 显然必须与真实的mm相同。 + + 对于一个匿名进程,tsk->mm == NULL,而tsk->active_mm是匿名进程运行时 + “借用”的mm。当匿名进程被调度走时,借用的地址空间被返回并清除。 + + 为了支持所有这些,“struct mm_struct”现在有两个计数器:一个是 “mm_users” + 计数器,即有多少 “真正的地址空间用户”,另一个是 “mm_count”计数器,即 “lazy” + 用户(即匿名用户)的数量,如果有任何真正的用户,则加1。 + + 通常情况下,至少有一个真正的用户,但也可能是真正的用户在另一个CPU上退出,而 + 一个lazy的用户仍在活动,所以你实际上得到的情况是,你有一个地址空间 **只** + 被lazy的用户使用。这通常是一个短暂的生命周期状态,因为一旦这个线程被安排给一 + 个真正的线程,这个 “僵尸” mm就会被释放,因为 “mm_count”变成了零。 + + 另外,一个新的规则是,**没有人** 再把 “init_mm” 作为一个真正的MM了。 + “init_mm”应该被认为只是一个 “没有其他上下文时的lazy上下文”,事实上,它主 + 要是在启动时使用,当时还没有真正的VM被创建。因此,用来检查的代码 + + if (current->mm == &init_mm) + + 一般来说,应该用 + + if (!current->mm) + + 取代上面的写法(这更有意义--测试基本上是 “我们是否有一个用户环境”,并且通常 + 由缺页异常处理程序和类似的东西来完成)。 + + 总之,我刚才在ftp.kernel.org上放了一个pre-patch-2.3.13-1,因为它稍微改 + 变了接口以适配alpha(谁会想到呢,但alpha体系结构上下文切换代码实际上最终是 + 最丑陋的之一--不像其他架构的MM和寄存器状态是分开的,alpha的PALcode将两者 + 连接起来,你需要同时切换两者)。 + + (文档来源 http://marc.info/?l=linux-kernel&m=93337278602211&w=2) diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst index 0fda40c602ac8..5fcdb75d8682e 100644 --- a/Documentation/translations/zh_CN/vm/index.rst +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -21,9 +21,9 @@ TODO:待引用文档集被翻译完毕后请及时修改此处) .. toctree:: :maxdepth: 1 + active_mm TODOLIST: -* active_mm * arch_pgtable_helpers * balance * damon/index -- GitLab From 88ba790d84e98b6f030c8efc8d3e9f042c0ab777 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 11 Jan 2022 10:13:16 +0800 Subject: [PATCH 0058/1586] docs/zh_CN: add balance translation Translate .../vm/balance.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Reviewed-by: Tang Yizhou Link: https://lore.kernel.org/r/f1e5a6bcbee3e34fe65a56ee185d9b44daf01cab.1641866889.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/balance.rst | 81 +++++++++++++++++++ Documentation/translations/zh_CN/vm/index.rst | 2 +- 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/balance.rst diff --git a/Documentation/translations/zh_CN/vm/balance.rst b/Documentation/translations/zh_CN/vm/balance.rst new file mode 100644 index 0000000000000..e98a47ef24a8f --- /dev/null +++ b/Documentation/translations/zh_CN/vm/balance.rst @@ -0,0 +1,81 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/balance.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + + +======== +内存平衡 +======== + +2000年1月开始,作者:Kanoj Sarcar + +对于 !__GFP_HIGH 和 !__GFP_KSWAPD_RECLAIM 以及非 __GFP_IO 的分配,需要进行 +内存平衡。 + +调用者避免回收的第一个原因是调用者由于持有自旋锁或处于中断环境中而无法睡眠。第二个 +原因可能是,调用者愿意在不产生页面回收开销的情况下分配失败。这可能发生在有0阶回退 +选项的机会主义高阶分配请求中。在这种情况下,调用者可能也希望避免唤醒kswapd。 + +__GFP_IO分配请求是为了防止文件系统死锁。 + +在没有非睡眠分配请求的情况下,做平衡似乎是有害的。页面回收可以被懒散地启动,也就是 +说,只有在需要的时候(也就是区域的空闲内存为0),而不是让它成为一个主动的过程。 + +也就是说,内核应该尝试从直接映射池中满足对直接映射页的请求,而不是回退到dma池中, +这样就可以保持dma池为dma请求(不管是不是原子的)所填充。类似的争论也适用于高内存 +和直接映射的页面。相反,如果有很多空闲的dma页,最好是通过从dma池中分配一个来满足 +常规的内存请求,而不是产生常规区域平衡的开销。 + +在2.2中,只有当空闲页总数低于总内存的1/64时,才会启动内存平衡/页面回收。如果dma +和常规内存的比例合适,即使dma区完全空了,也很可能不会进行平衡。2.2已经在不同内存 +大小的生产机器上运行,即使有这个问题存在,似乎也做得不错。在2.3中,由于HIGHMEM的 +存在,这个问题变得更加严重。 + +在2.3中,区域平衡可以用两种方式之一来完成:根据区域的大小(可能是低级区域的大小), +我们可以在初始化阶段决定在平衡任何区域时应该争取多少空闲页。好的方面是,在平衡的时 +候,我们不需要看低级区的大小,坏的方面是,我们可能会因为忽略低级区可能较低的使用率 +而做过于频繁的平衡。另外,只要对分配程序稍作修改,就有可能将memclass()宏简化为一 +个简单的等式。 + +另一个可能的解决方案是,我们只在一个区 **和** 其所有低级区的空闲内存低于该区及其 +低级区总内存的1/64时进行平衡。这就解决了2.2的平衡问题,并尽可能地保持了与2.2行为 +的接近。另外,平衡算法在各种架构上的工作方式也是一样的,这些架构有不同数量和类型的 +内存区。如果我们想变得更花哨一点,我们可以在未来为不同区域的自由页面分配不同的权重。 + +请注意,如果普通区的大小与dma区相比是巨大的,那么在决定是否平衡普通区的时候,考虑 +空闲的dma页就变得不那么重要了。那么第一个解决方案就变得更有吸引力。 + +所附的补丁实现了第二个解决方案。它还 “修复”了两个问题:首先,在低内存条件下,kswapd +被唤醒,就像2.2中的非睡眠分配。第二,HIGHMEM区也被平衡了,以便给replace_with_highmem() +一个争取获得HIGHMEM页的机会,同时确保HIGHMEM分配不会落回普通区。这也确保了HIGHMEM +页不会被泄露(例如,在一个HIGHMEM页在交换缓存中但没有被任何人使用的情况下)。 + +kswapd还需要知道它应该平衡哪些区。kswapd主要是在无法进行平衡的情况下需要的,可能 +是因为所有的分配请求都来自中断上下文,而所有的进程上下文都在睡眠。对于2.3, +kswapd并不真正需要平衡高内存区,因为中断上下文并不请求高内存页。kswapd看zone +结构体中的zone_wake_kswapd字段来决定一个区是否需要平衡。 + +如果从进程内存和shm中偷取页面可以减轻该页面节点中任何区的内存压力,而该区的内存压力 +已经低于其水位,则会进行偷取。 + +watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: +这些是每个区的字段,用于确定一个区何时需要平衡。当页面数低于水位[WMARK_MIN]时, +hysteric 的字段low_on_memory被设置。这个字段会一直被设置,直到空闲页数变成水位 +[WMARK_HIGH]。当low_on_memory被设置时,页面分配请求将尝试释放该区域的一些页面(如果 +请求中设置了GFP_WAIT)。与此相反的是,决定唤醒kswapd以释放一些区的页。这个决定不是基于 +hysteresis 的,而是当空闲页的数量低于watermark[WMARK_LOW]时就会进行;在这种情况下, +zone_wake_kswapd也被设置。 + + +我所听到的(超棒的)想法: + +1. 动态经历应该影响平衡:可以跟踪一个区的失败请求的数量,并反馈到平衡方案中(jalvo@mbay.net)。 + +2. 实现一个类似于replace_with_highmem()的replace_with_regular(),以保留dma页面。 + (lkd@tantalophile.demon.co.uk) diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst index 5fcdb75d8682e..fc39f0d60318e 100644 --- a/Documentation/translations/zh_CN/vm/index.rst +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -22,10 +22,10 @@ TODO:待引用文档集被翻译完毕后请及时修改此处) :maxdepth: 1 active_mm + balance TODOLIST: * arch_pgtable_helpers -* balance * damon/index * free_page_reporting * frontswap -- GitLab From 2701b511e491f95d829a340c2540d602db5c63d8 Mon Sep 17 00:00:00 2001 From: wangyong Date: Wed, 12 Jan 2022 08:04:42 -0800 Subject: [PATCH 0059/1586] docs/zh_CN: Update zh_CN/accounting/delay-accounting.rst Update zh_CN/accounting/delay-accounting.rst. The document modification has been merged which refers to the following link: https://lore.kernel.org/all/1639583021-92977-1-git-send-email-wang.yong12@zte.com.cn/ Signed-off-by: wangyong Reviewed-by: Yang Yang Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/1642003482-48935-1-git-send-email-wang.yong12@zte.com.cn Signed-off-by: Jonathan Corbet --- .../zh_CN/accounting/delay-accounting.rst | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/Documentation/translations/zh_CN/accounting/delay-accounting.rst b/Documentation/translations/zh_CN/accounting/delay-accounting.rst index 67d5606e54010..f1849411018e9 100644 --- a/Documentation/translations/zh_CN/accounting/delay-accounting.rst +++ b/Documentation/translations/zh_CN/accounting/delay-accounting.rst @@ -17,6 +17,8 @@ a) 等待一个CPU(任务为可运行) b) 完成由该任务发起的块I/O同步请求 c) 页面交换 d) 内存回收 +e) 页缓存抖动 +f) 直接规整 并将这些统计信息通过taskstats接口提供给用户空间。 @@ -37,10 +39,10 @@ d) 内存回收 向用户态返回一个通用数据结构,对应每pid或每tgid的统计信息。延时计数功能填写 该数据结构的特定字段。见 - include/linux/taskstats.h + include/uapi/linux/taskstats.h 其描述了延时计数相关字段。系统通常以计数器形式返回 CPU、同步块 I/O、交换、内存 -回收等的累积延时。 +回收、页缓存抖动、直接规整等的累积延时。 取任务某计数器两个连续读数的差值,将得到任务在该时间间隔内等待对应资源的总延时。 @@ -72,40 +74,36 @@ kernel.task_delayacct进行开关。注意,只有在启用延时计数后启 getdelays命令的一般格式:: - getdelays [-t tgid] [-p pid] [-c cmd...] + getdelays [-dilv] [-t tgid] [-p pid] 获取pid为10的任务从系统启动后的延时信息:: - # ./getdelays -p 10 + # ./getdelays -d -p 10 (输出信息和下例相似) 获取所有tgid为5的任务从系统启动后的总延时信息:: - # ./getdelays -t 5 - - - CPU count real total virtual total delay total - 7876 92005750 100000000 24001500 - IO count delay total - 0 0 - SWAP count delay total - 0 0 - RECLAIM count delay total - 0 0 - -获取指定简单命令运行时的延时信息:: - - # ./getdelays -c ls / - - bin data1 data3 data5 dev home media opt root srv sys usr - boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var - - - CPU count real total virtual total delay total - 6 4000250 4000000 0 - IO count delay total - 0 0 - SWAP count delay total - 0 0 - RECLAIM count delay total - 0 0 + # ./getdelays -d -t 5 + print delayacct stats ON + TGID 5 + + + CPU count real total virtual total delay total delay average + 8 7000000 6872122 3382277 0.423ms + IO count delay total delay average + 0 0 0ms + SWAP count delay total delay average + 0 0 0ms + RECLAIM count delay total delay average + 0 0 0ms + THRASHING count delay total delay average + 0 0 0ms + COMPACT count delay total delay average + 0 0 0ms + +获取pid为1的IO计数,它只和-p一起使用:: + # ./getdelays -i -p 1 + printing IO accounting + linuxrc: read=65536, write=0, cancelled_write=0 + +上面的命令与-v一起使用,可以获取更多调试信息。 -- GitLab From e2d99027da08a816ba63d0b8335dbde0ec1152c6 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Sat, 15 Jan 2022 15:15:00 +0800 Subject: [PATCH 0060/1586] docs/zh_CN: add damon index tronslation 1) Translate .../vm/damon/index.rst into Chinese. 2) add damon into .../zh_CN/vm/index.rst Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/8e8c7651785f1ce20766bc1b3a4fc44faedb84bb.1642230669.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/damon/index.rst | 33 +++++++++++++++++++ Documentation/translations/zh_CN/vm/index.rst | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/damon/index.rst diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst new file mode 100644 index 0000000000000..9a7b4ea91cb82 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/index.rst @@ -0,0 +1,33 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/index.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + + +========================== +DAMON:数据访问监视器 +========================== + +DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心机制使其成为 +(该核心机制详见(Documentation/translations/zh_CN/vm/damon/design.rst)) + + - *准确度* (监测输出对DRAM级别的内存管理足够有用;但可能不适合CPU Cache级别), + - *轻量级* (监控开销低到可以在线应用),以及 + - *可扩展* (无论目标工作负载的大小,开销的上限值都在恒定范围内)。 + +因此,利用这个框架,内核的内存管理机制可以做出高级决策。会导致高数据访问监控开销的实 +验性内存管理优化工作可以再次进行。同时,在用户空间,有一些特殊工作负载的用户可以编写 +个性化的应用程序,以便更好地了解和优化他们的工作负载和系统。 + +.. toctree:: + :maxdepth: 2 + +TODOLIST: +* faq +* design +* api diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst index fc39f0d60318e..432bc3ef1c18c 100644 --- a/Documentation/translations/zh_CN/vm/index.rst +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -23,10 +23,10 @@ TODO:待引用文档集被翻译完毕后请及时修改此处) active_mm balance + damon/index TODOLIST: * arch_pgtable_helpers -* damon/index * free_page_reporting * frontswap * highmem -- GitLab From 3fd8816219311289eaec2eb9bc389146a553fe4e Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Sat, 15 Jan 2022 15:15:01 +0800 Subject: [PATCH 0061/1586] docs/zh_CN: add damon faq translation Translate .../vm/damon/faq.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/30507f807a835360f57bb9498c37f4c3644b33b7.1642230669.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/damon/faq.rst | 48 +++++++++++++++++++ .../translations/zh_CN/vm/damon/index.rst | 4 +- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/damon/faq.rst diff --git a/Documentation/translations/zh_CN/vm/damon/faq.rst b/Documentation/translations/zh_CN/vm/damon/faq.rst new file mode 100644 index 0000000000000..07b4ac19407de --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/faq.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/faq.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + + +======== +常见问题 +======== + +为什么是一个新的子系统,而不是扩展perf或其他用户空间工具? +========================================================== + +首先,因为它需要尽可能的轻量级,以便可以在线使用,所以应该避免任何不必要的开销,如内核-用户 +空间的上下文切换成本。第二,DAMON的目标是被包括内核在内的其他程序所使用。因此,对特定工具 +(如perf)的依赖性是不可取的。这就是DAMON在内核空间实现的两个最大的原因。 + + +“闲置页面跟踪” 或 “perf mem” 可以替代DAMON吗? +============================================== + +闲置页跟踪是物理地址空间访问检查的一个低层次的原始方法。“perf mem”也是类似的,尽管它可以 +使用采样来减少开销。另一方面,DAMON是一个更高层次的框架,用于监控各种地址空间。它专注于内 +存管理优化,并提供复杂的精度/开销处理机制。因此,“空闲页面跟踪” 和 “perf mem” 可以提供 +DAMON输出的一个子集,但不能替代DAMON。 + + +DAMON是否只支持虚拟内存? +========================= + +不,DAMON的核心是独立于地址空间的。用户可以在DAMON核心上实现和配置特定地址空间的低级原始 +部分,包括监测目标区域的构造和实际的访问检查。通过这种方式,DAMON用户可以用任何访问检查技 +术来监测任何地址空间。 + +尽管如此,DAMON默认为虚拟内存和物理内存提供了基于vma/rmap跟踪和PTE访问位检查的地址空间 +相关功能的实现,以供参考和方便使用。 + + +我可以简单地监测页面的粒度吗? +============================== + +是的,你可以通过设置 ``min_nr_regions`` 属性高于工作集大小除以页面大小的值来实现。 +因为监视目标区域的大小被强制为 ``>=page size`` ,所以区域分割不会产生任何影响。 diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst index 9a7b4ea91cb82..dfa82244cbe8c 100644 --- a/Documentation/translations/zh_CN/vm/damon/index.rst +++ b/Documentation/translations/zh_CN/vm/damon/index.rst @@ -27,7 +27,9 @@ DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心 .. toctree:: :maxdepth: 2 + faq + TODOLIST: -* faq + * design * api -- GitLab From 18e74934dc4a65ff1dfb24b83f9778ded0f247dd Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Sat, 15 Jan 2022 15:15:02 +0800 Subject: [PATCH 0062/1586] docs/zh_CN: add damon design translation Translate .../vm/damon/design.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/2685ed7d446620b260c20158685728c3adb5e0fe.1642230669.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/damon/design.rst | 139 ++++++++++++++++++ .../translations/zh_CN/vm/damon/index.rst | 2 +- 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/damon/design.rst diff --git a/Documentation/translations/zh_CN/vm/damon/design.rst b/Documentation/translations/zh_CN/vm/damon/design.rst new file mode 100644 index 0000000000000..05f66c02740a9 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/design.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/design.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + + +==== +设计 +==== + +可配置的层 +========== + +DAMON提供了数据访问监控功能,同时使其准确性和开销可控。基本的访问监控需要依赖于目标地址空间 +并为之优化的基元。另一方面,作为DAMON的核心,准确性和开销的权衡机制是在纯逻辑空间中。DAMON +将这两部分分离在不同的层中,并定义了它的接口,以允许各种低层次的基元实现与核心逻辑的配置。 + +由于这种分离的设计和可配置的接口,用户可以通过配置核心逻辑和适当的低级基元实现来扩展DAMON的 +任何地址空间。如果没有提供合适的,用户可以自己实现基元。 + +例如,物理内存、虚拟内存、交换空间、那些特定的进程、NUMA节点、文件和支持的内存设备将被支持。 +另外,如果某些架构或设备支持特殊的优化访问检查基元,这些基元将很容易被配置。 + + +特定地址空间基元的参考实现 +========================== + +基本访问监测的低级基元被定义为两部分。: + +1. 确定地址空间的监测目标地址范围 +2. 目标空间中特定地址范围的访问检查。 + +DAMON目前为物理和虚拟地址空间提供了基元的实现。下面两个小节描述了这些工作的方式。 + + +基于VMA的目标地址范围构造 +------------------------- + +这仅仅是针对虚拟地址空间基元的实现。对于物理地址空间,只是要求用户手动设置监控目标地址范围。 + +在进程的超级巨大的虚拟地址空间中,只有小部分被映射到物理内存并被访问。因此,跟踪未映射的地 +址区域只是一种浪费。然而,由于DAMON可以使用自适应区域调整机制来处理一定程度的噪声,所以严 +格来说,跟踪每一个映射并不是必须的,但在某些情况下甚至会产生很高的开销。也就是说,监测目标 +内部过于巨大的未映射区域应该被移除,以不占用自适应机制的时间。 + +出于这个原因,这个实现将复杂的映射转换为三个不同的区域,覆盖地址空间的每个映射区域。这三个 +区域之间的两个空隙是给定地址空间中两个最大的未映射区域。这两个最大的未映射区域是堆和最上面 +的mmap()区域之间的间隙,以及在大多数情况下最下面的mmap()区域和堆之间的间隙。因为这些间隙 +在通常的地址空间中是异常巨大的,排除这些间隙就足以做出合理的权衡。下面详细说明了这一点:: + + + + + (small mmap()-ed regions and munmap()-ed regions) + + + + + +基于PTE访问位的访问检查 +----------------------- + +物理和虚拟地址空间的实现都使用PTE Accessed-bit进行基本访问检查。唯一的区别在于从地址中 +找到相关的PTE访问位的方式。虚拟地址的实现是为该地址的目标任务查找页表,而物理地址的实现则 +是查找与该地址有映射关系的每一个页表。通过这种方式,实现者找到并清除下一个采样目标地址的位, +并检查该位是否在一个采样周期后再次设置。这可能会干扰其他使用访问位的内核子系统,即空闲页跟 +踪和回收逻辑。为了避免这种干扰,DAMON使其与空闲页面跟踪相互排斥,并使用 ``PG_idle`` 和 +``PG_young`` 页面标志来解决与回收逻辑的冲突,就像空闲页面跟踪那样。 + + +独立于地址空间的核心机制 +======================== + +下面四个部分分别描述了DAMON的核心机制和五个监测属性,即 ``采样间隔`` 、 ``聚集间隔`` 、 +``区域更新间隔`` 、 ``最小区域数`` 和 ``最大区域数`` 。 + + +访问频率监测 +------------ + +DAMON的输出显示了在给定的时间内哪些页面的访问频率是多少。访问频率的分辨率是通过设置 +``采样间隔`` 和 ``聚集间隔`` 来控制的。详细地说,DAMON检查每个 ``采样间隔`` 对每 +个页面的访问,并将结果汇总。换句话说,计算每个页面的访问次数。在每个 ``聚合间隔`` 过 +去后,DAMON调用先前由用户注册的回调函数,以便用户可以阅读聚合的结果,然后再清除这些结 +果。这可以用以下简单的伪代码来描述:: + + while monitoring_on: + for page in monitoring_target: + if accessed(page): + nr_accesses[page] += 1 + if time() % aggregation_interval == 0: + for callback in user_registered_callbacks: + callback(monitoring_target, nr_accesses) + for page in monitoring_target: + nr_accesses[page] = 0 + sleep(sampling interval) + +这种机制的监测开销将随着目标工作负载规模的增长而任意增加。 + + +基于区域的抽样调查 +------------------ + +为了避免开销的无限制增加,DAMON将假定具有相同访问频率的相邻页面归入一个区域。只要保持 +这个假设(一个区域内的页面具有相同的访问频率),该区域内就只需要检查一个页面。因此,对 +于每个 ``采样间隔`` ,DAMON在每个区域中随机挑选一个页面,等待一个 ``采样间隔`` ,检 +查该页面是否同时被访问,如果被访问则增加该区域的访问频率。因此,监测开销是可以通过设置 +区域的数量来控制的。DAMON允许用户设置最小和最大的区域数量来进行权衡。 + +然而,如果假设没有得到保证,这个方案就不能保持输出的质量。 + + +适应性区域调整 +-------------- + +即使最初的监测目标区域被很好地构建以满足假设(同一区域内的页面具有相似的访问频率),数 +据访问模式也会被动态地改变。这将导致监测质量下降。为了尽可能地保持假设,DAMON根据每个 +区域的访问频率自适应地进行合并和拆分。 + +对于每个 ``聚集区间`` ,它比较相邻区域的访问频率,如果频率差异较小,就合并这些区域。 +然后,在它报告并清除每个区域的聚合接入频率后,如果区域总数不超过用户指定的最大区域数, +它将每个区域拆分为两个或三个区域。 + +通过这种方式,DAMON提供了其最佳的质量和最小的开销,同时保持了用户为其权衡设定的界限。 + + +动态目标空间更新处理 +-------------------- + +监测目标地址范围可以动态改变。例如,虚拟内存可以动态地被映射和解映射。物理内存可以被 +热插拔。 + +由于在某些情况下变化可能相当频繁,DAMON检查动态内存映射的变化,并仅在用户指定的时间 +间隔( ``区域更新间隔`` )内将其应用于抽象的目标区域。 diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst index dfa82244cbe8c..077db7e4326f0 100644 --- a/Documentation/translations/zh_CN/vm/damon/index.rst +++ b/Documentation/translations/zh_CN/vm/damon/index.rst @@ -28,8 +28,8 @@ DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心 :maxdepth: 2 faq + design TODOLIST: -* design * api -- GitLab From 722cc663d79c80051cbd57de0336582c8e3cbf93 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Sat, 15 Jan 2022 15:15:03 +0800 Subject: [PATCH 0063/1586] docs/zh_CN: add damon api translation Translate .../vm/damon/api.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/3d81dbc79e0f982edd68fb9dfee6f0ccb47f7710.1642230669.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/damon/api.rst | 32 +++++++++++++++++++ .../translations/zh_CN/vm/damon/index.rst | 4 +-- 2 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 Documentation/translations/zh_CN/vm/damon/api.rst diff --git a/Documentation/translations/zh_CN/vm/damon/api.rst b/Documentation/translations/zh_CN/vm/damon/api.rst new file mode 100644 index 0000000000000..21143eea4ebed --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/api.rst @@ -0,0 +1,32 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/api.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + + +======= +API参考 +======= + +内核空间的程序可以使用下面的API来使用DAMON的每个功能。你所需要做的就是引用 ``damon.h`` , +它位于源代码树的include/linux/。 + +结构体 +====== + +该API在以下内核代码中: + +include/linux/damon.h + + +函数 +==== + +该API在以下内核代码中: + +mm/damon/core.c diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst index 077db7e4326f0..84d36d90c9b0f 100644 --- a/Documentation/translations/zh_CN/vm/damon/index.rst +++ b/Documentation/translations/zh_CN/vm/damon/index.rst @@ -29,7 +29,5 @@ DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心 faq design + api -TODOLIST: - -* api -- GitLab From adeacecbd36cb820f65e93194baba17decdded32 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 27 Jan 2022 09:47:53 +0800 Subject: [PATCH 0064/1586] docs/zh_CN: add free_page_reporting translation Translate .../vm/free_page_reporting.rst into Chinese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/4ef77c1be8319fc45b18e9f4c41986095d794562.1643246827.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../zh_CN/vm/free_page_reporting.rst | 38 +++++++++++++++++++ Documentation/translations/zh_CN/vm/index.rst | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/free_page_reporting.rst diff --git a/Documentation/translations/zh_CN/vm/free_page_reporting.rst b/Documentation/translations/zh_CN/vm/free_page_reporting.rst new file mode 100644 index 0000000000000..31d6c34b956b4 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/free_page_reporting.rst @@ -0,0 +1,38 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/_free_page_reporting.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + +========== +空闲页报告 +========== + +空闲页报告是一个API,设备可以通过它来注册接收系统当前未使用的页面列表。这在虚拟 +化的情况下是很有用的,客户机能够使用这些数据来通知管理器它不再使用内存中的某些页 +面。 + +对于驱动,通常是气球驱动要使用这个功能,它将分配和初始化一个page_reporting_dev_info +结构体。它要填充的结构体中的字段是用于处理散点列表的 "report" 函数指针。它还必 +须保证每次调用该函数时能处理至少相当于PAGE_REPORTING_CAPACITY的散点列表条目。 +假设没有其他页面报告设备已经注册, 对page_reporting_register的调用将向报告框 +架注册页面报告接口。 + +一旦注册,页面报告API将开始向驱动报告成批的页面。API将在接口被注册后2秒开始报告 +页面,并在任何足够高的页面被释放之后2秒继续报告。 + +报告的页面将被存储在传递给报告函数的散列表中,最后一个条目的结束位被设置在条目 +nent-1中。 当页面被报告函数处理时,分配器将无法访问它们。一旦报告函数完成,这些 +页将被返回到它们所获得的自由区域。 + +在移除使用空闲页报告的驱动之前,有必要调用page_reporting_unregister,以移除 +目前被空闲页报告使用的page_reporting_dev_info结构体。这样做将阻止进一步的报 +告通过该接口发出。如果另一个驱动或同一驱动被注册,它就有可能恢复前一个驱动在报告 +空闲页方面的工作。 + + +Alexander Duyck, 2019年12月04日 diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst index 432bc3ef1c18c..13df8321f2426 100644 --- a/Documentation/translations/zh_CN/vm/index.rst +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -24,6 +24,7 @@ TODO:待引用文档集被翻译完毕后请及时修改此处) active_mm balance damon/index + free_page_reporting TODOLIST: * arch_pgtable_helpers @@ -33,7 +34,6 @@ TODOLIST: * hmm * hwpoison * hugetlbfs_reserv -* ksm * memory-model * mmu_notifier * numa -- GitLab From 4c97fdb06b9884da8682c869303b659a25c0b952 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 27 Jan 2022 09:47:54 +0800 Subject: [PATCH 0065/1586] docs/zh_CN: add highmem translation Translate .../vm/highmem.rst into Chenese. Signed-off-by: Yanteng Si Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/51e088d1e8659b9411534a5a3ad03d88c79a5297.1643246827.git.siyanteng@loongson.cn Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/vm/highmem.rst | 128 ++++++++++++++++++ Documentation/translations/zh_CN/vm/index.rst | 2 +- 2 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/vm/highmem.rst diff --git a/Documentation/translations/zh_CN/vm/highmem.rst b/Documentation/translations/zh_CN/vm/highmem.rst new file mode 100644 index 0000000000000..018838e58c3e7 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/highmem.rst @@ -0,0 +1,128 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/highmem.rst + +:翻译: + + 司延腾 Yanteng Si + +:校译: + +========== +高内存处理 +========== + +作者: Peter Zijlstra + +.. contents:: :local: + +高内存是什么? +============== + +当物理内存的大小接近或超过虚拟内存的最大大小时,就会使用高内存(highmem)。在这一点上,内 +核不可能在任何时候都保持所有可用的物理内存的映射。这意味着内核需要开始使用它想访问的物理内 +存的临时映射。 + +没有被永久映射覆盖的那部分(物理)内存就是我们所说的 "高内存"。对于这个边界的确切位置,有 +各种架构上的限制。 + +例如,在i386架构中,我们选择将内核映射到每个进程的虚拟空间,这样我们就不必为内核的进入/退 +出付出全部的TLB作废代价。这意味着可用的虚拟内存空间(i386上为4GiB)必须在用户和内核空间之 +间进行划分。 + +使用这种方法的架构的传统分配方式是3:1,3GiB用于用户空间,顶部的1GiB用于内核空间。:: + + +--------+ 0xffffffff + | Kernel | + +--------+ 0xc0000000 + | | + | User | + | | + +--------+ 0x00000000 + +这意味着内核在任何时候最多可以映射1GiB的物理内存,但是由于我们需要虚拟地址空间来做其他事 +情--包括访问其余物理内存的临时映射--实际的直接映射通常会更少(通常在~896MiB左右)。 + +其他有mm上下文标签的TLB的架构可以有独立的内核和用户映射。然而,一些硬件(如一些ARM)在使 +用mm上下文标签时,其虚拟空间有限。 + + +临时虚拟映射 +============ + +内核包含几种创建临时映射的方法。: + +* vmap(). 这可以用来将多个物理页长期映射到一个连续的虚拟空间。它需要synchronization + 来解除映射。 + +* kmap(). 这允许对单个页面进行短期映射。它需要synchronization,但在一定程度上被摊销。 + 当以嵌套方式使用时,它也很容易出现死锁,因此不建议在新代码中使用它。 + +* kmap_atomic(). 这允许对单个页面进行非常短的时间映射。由于映射被限制在发布它的CPU上, + 它表现得很好,但发布任务因此被要求留在该CPU上直到它完成,以免其他任务取代它的映射。 + + kmap_atomic() 也可以由中断上下文使用,因为它不睡眠,而且调用者可能在调用kunmap_atomic() + 之后才睡眠。 + + 可以假设k[un]map_atomic()不会失败。 + + +使用kmap_atomic +=============== + +何时何地使用 kmap_atomic() 是很直接的。当代码想要访问一个可能从高内存(见__GFP_HIGHMEM) +分配的页面的内容时,例如在页缓存中的页面,就会使用它。该API有两个函数,它们的使用方式与 +下面类似:: + + /* 找到感兴趣的页面。 */ + struct page *page = find_get_page(mapping, offset); + + /* 获得对该页内容的访问权。 */ + void *vaddr = kmap_atomic(page); + + /* 对该页的内容做一些处理。 */ + memset(vaddr, 0, PAGE_SIZE); + + /* 解除该页面的映射。 */ + kunmap_atomic(vaddr); + +注意,kunmap_atomic()调用的是kmap_atomic()调用的结果而不是参数。 + +如果你需要映射两个页面,因为你想从一个页面复制到另一个页面,你需要保持kmap_atomic调用严 +格嵌套,如:: + + vaddr1 = kmap_atomic(page1); + vaddr2 = kmap_atomic(page2); + + memcpy(vaddr1, vaddr2, PAGE_SIZE); + + kunmap_atomic(vaddr2); + kunmap_atomic(vaddr1); + + +临时映射的成本 +============== + +创建临时映射的代价可能相当高。体系架构必须操作内核的页表、数据TLB和/或MMU的寄存器。 + +如果CONFIG_HIGHMEM没有被设置,那么内核会尝试用一点计算来创建映射,将页面结构地址转换成 +指向页面内容的指针,而不是去捣鼓映射。在这种情况下,解映射操作可能是一个空操作。 + +如果CONFIG_MMU没有被设置,那么就不可能有临时映射和高内存。在这种情况下,也将使用计算方法。 + + +i386 PAE +======== + +在某些情况下,i386 架构将允许你在 32 位机器上安装多达 64GiB 的内存。但这有一些后果: + +* Linux需要为系统中的每个页面建立一个页帧结构,而且页帧需要驻在永久映射中,这意味着: + +* 你最多可以有896M/sizeof(struct page)页帧;由于页结构体是32字节的,所以最终会有 + 112G的页;然而,内核需要在内存中存储更多的页帧...... + +* PAE使你的页表变大--这使系统变慢,因为更多的数据需要在TLB填充等方面被访问。一个好处 + 是,PAE有更多的PTE位,可以提供像NX和PAT这样的高级功能。 + +一般的建议是,你不要在32位机器上使用超过8GiB的空间--尽管更多的空间可能对你和你的工作 +量有用,但你几乎是靠你自己--不要指望内核开发者真的会很关心事情的进展情况。 diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst index 13df8321f2426..2f9834eb9475c 100644 --- a/Documentation/translations/zh_CN/vm/index.rst +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -25,12 +25,12 @@ TODO:待引用文档集被翻译完毕后请及时修改此处) balance damon/index free_page_reporting + highmem TODOLIST: * arch_pgtable_helpers * free_page_reporting * frontswap -* highmem * hmm * hwpoison * hugetlbfs_reserv -- GitLab From 869f496e1aa6d2b9a8653d65aa7040970f76627a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 27 Jan 2022 18:32:58 +0200 Subject: [PATCH 0066/1586] docs: process: submitting-patches: Clarify the Reported-by usage It's unclear from "Submitting Patches" documentation that Reported-by is not supposed to be used against new features. (It's more clear in the section 5.4 "Patch formatting and changelogs" of the "A guide to the Kernel Development Process", where it suggests that change should fix something existing in the kernel. Clarify the Reported-by usage in the "Submitting Patches". Reported-by: Florian Eckert Signed-off-by: Andy Shevchenko Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20220127163258.48482-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 31ea120ce531c..fb496b2ebfd38 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -495,7 +495,8 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes: The Reported-by tag gives credit to people who find bugs and report them and it hopefully inspires them to help us again in the future. Please note that if the bug was reported in private, then ask for permission first before using the -Reported-by tag. +Reported-by tag. The tag is intended for bugs; please do not use it to credit +feature requests. A Tested-by: tag indicates that the patch has been successfully tested (in some environment) by the person named. This tag informs maintainers that -- GitLab From ecff30575b5ad0eda149aadad247b7f75411fd47 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Thu, 27 Jan 2022 04:51:00 +0000 Subject: [PATCH 0067/1586] LSM: general protection fault in legacy_parse_param The usual LSM hook "bail on fail" scheme doesn't work for cases where a security module may return an error code indicating that it does not recognize an input. In this particular case Smack sees a mount option that it recognizes, and returns 0. A call to a BPF hook follows, which returns -ENOPARAM, which confuses the caller because Smack has processed its data. The SELinux hook incorrectly returns 1 on success. There was a time when this was correct, however the current expectation is that it return 0 on success. This is repaired. Reported-by: syzbot+d1e3b1d92d25abf97943@syzkaller.appspotmail.com Signed-off-by: Casey Schaufler Acked-by: James Morris Signed-off-by: Paul Moore --- security/security.c | 17 +++++++++++++++-- security/selinux/hooks.c | 5 ++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/security/security.c b/security/security.c index 3d4eb474f35b0..e649c8691be27 100644 --- a/security/security.c +++ b/security/security.c @@ -884,9 +884,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) return call_int_hook(fs_context_dup, 0, fc, src_fc); } -int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) +int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) { - return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); + struct security_hook_list *hp; + int trc; + int rc = -ENOPARAM; + + hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param, + list) { + trc = hp->hook.fs_context_parse_param(fc, param); + if (trc == 0) + rc = 0; + else if (trc != -ENOPARAM) + return trc; + } + return rc; } int security_sb_alloc(struct super_block *sb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 221e642025f53..9e3658e9e7ca7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2859,10 +2859,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc, return opt; rc = selinux_add_opt(opt, param->string, &fc->security); - if (!rc) { + if (!rc) param->string = NULL; - rc = 1; - } + return rc; } -- GitLab From 660ff26080f587052c338094a187e8ecc758d6e9 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 4 Jan 2022 16:29:16 +0800 Subject: [PATCH 0068/1586] Documentation: use the tabs on all acc documentation Use the tabs on all Hisilicon Accelerator documentation. including hpre, sec, zip. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-hpre | 168 ++++++++++---------- Documentation/ABI/testing/debugfs-hisi-sec | 136 ++++++++-------- Documentation/ABI/testing/debugfs-hisi-zip | 136 ++++++++-------- 3 files changed, 220 insertions(+), 220 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre index b4be5f1db4b77..68d4dcb1538d9 100644 --- a/Documentation/ABI/testing/debugfs-hisi-hpre +++ b/Documentation/ABI/testing/debugfs-hisi-hpre @@ -1,140 +1,140 @@ -What: /sys/kernel/debug/hisi_hpre//cluster[0-3]/regs -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: Dump debug registers from the HPRE cluster. +What: /sys/kernel/debug/hisi_hpre//cluster[0-3]/regs +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump debug registers from the HPRE cluster. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//cluster[0-3]/cluster_ctrl -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: Write the HPRE core selection in the cluster into this file, +What: /sys/kernel/debug/hisi_hpre//cluster[0-3]/cluster_ctrl +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Write the HPRE core selection in the cluster into this file, and then we can read the debug information of the core. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//rdclr_en -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: HPRE cores debug registers read clear control. 1 means enable +What: /sys/kernel/debug/hisi_hpre//rdclr_en +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: HPRE cores debug registers read clear control. 1 means enable register read clear, otherwise 0. Writing to this file has no functional effect, only enable or disable counters clear after reading of these registers. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//current_qm -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: One HPRE controller has one PF and multiple VFs, each function +What: /sys/kernel/debug/hisi_hpre//current_qm +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: One HPRE controller has one PF and multiple VFs, each function has a QM. Select the QM which below qm refers to. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//regs -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: Dump debug registers from the HPRE. +What: /sys/kernel/debug/hisi_hpre//regs +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump debug registers from the HPRE. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//qm/regs -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: Dump debug registers from the QM. +What: /sys/kernel/debug/hisi_hpre//qm/regs +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump debug registers from the QM. Available for PF and VF in host. VF in guest currently only has one debug register. -What: /sys/kernel/debug/hisi_hpre//qm/current_q -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: One QM may contain multiple queues. Select specific queue to +What: /sys/kernel/debug/hisi_hpre//qm/current_q +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: One QM may contain multiple queues. Select specific queue to show its debug registers in above regs. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//qm/clear_enable -Date: Sep 2019 -Contact: linux-crypto@vger.kernel.org -Description: QM debug registers(regs) read clear control. 1 means enable +What: /sys/kernel/debug/hisi_hpre//qm/clear_enable +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: QM debug registers(regs) read clear control. 1 means enable register read clear, otherwise 0. Writing to this file has no functional effect, only enable or disable counters clear after reading of these registers. Only available for PF. -What: /sys/kernel/debug/hisi_hpre//qm/err_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of invalid interrupts for +What: /sys/kernel/debug/hisi_hpre//qm/err_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of invalid interrupts for QM task completion. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//qm/aeq_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of QM async event queue interrupts. +What: /sys/kernel/debug/hisi_hpre//qm/aeq_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of QM async event queue interrupts. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//qm/abnormal_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of interrupts for QM abnormal event. +What: /sys/kernel/debug/hisi_hpre//qm/abnormal_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of interrupts for QM abnormal event. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//qm/create_qp_err -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of queue allocation errors. +What: /sys/kernel/debug/hisi_hpre//qm/create_qp_err +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of queue allocation errors. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//qm/mb_err -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of failed QM mailbox commands. +What: /sys/kernel/debug/hisi_hpre//qm/mb_err +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of failed QM mailbox commands. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//qm/status -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the status of the QM. +What: /sys/kernel/debug/hisi_hpre//qm/status +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the status of the QM. Four states: initiated, started, stopped and closed. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of sent requests. +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of sent requests. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/recv_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of received requests. +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/recv_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of received requests. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_busy_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of requests sent +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_busy_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of requests sent with returning busy. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_fail_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of completed but error requests. +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_fail_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of completed but error requests. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/invalid_req_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of invalid requests being received. +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/invalid_req_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of invalid requests being received. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/overtime_thrhld -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Set the threshold time for counting the request which is +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/overtime_thrhld +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Set the threshold time for counting the request which is processed longer than the threshold. 0: disable(default), 1: 1 microsecond. Available for both PF and VF, and take no other effect on HPRE. -What: /sys/kernel/debug/hisi_hpre//hpre_dfx/over_thrhld_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of time out requests. +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/over_thrhld_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of time out requests. Available for both PF and VF, and take no other effect on HPRE. diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec index 85feb4408e0f6..58cfa354534c7 100644 --- a/Documentation/ABI/testing/debugfs-hisi-sec +++ b/Documentation/ABI/testing/debugfs-hisi-sec @@ -1,113 +1,113 @@ -What: /sys/kernel/debug/hisi_sec2//clear_enable -Date: Oct 2019 -Contact: linux-crypto@vger.kernel.org -Description: Enabling/disabling of clear action after reading +What: /sys/kernel/debug/hisi_sec2//clear_enable +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Enabling/disabling of clear action after reading the SEC debug registers. 0: disable, 1: enable. Only available for PF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//current_qm -Date: Oct 2019 -Contact: linux-crypto@vger.kernel.org -Description: One SEC controller has one PF and multiple VFs, each function +What: /sys/kernel/debug/hisi_sec2//current_qm +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: One SEC controller has one PF and multiple VFs, each function has a QM. This file can be used to select the QM which below qm refers to. Only available for PF. -What: /sys/kernel/debug/hisi_sec2//qm/qm_regs -Date: Oct 2019 -Contact: linux-crypto@vger.kernel.org -Description: Dump of QM related debug registers. +What: /sys/kernel/debug/hisi_sec2//qm/qm_regs +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump of QM related debug registers. Available for PF and VF in host. VF in guest currently only has one debug register. -What: /sys/kernel/debug/hisi_sec2//qm/current_q -Date: Oct 2019 -Contact: linux-crypto@vger.kernel.org -Description: One QM of SEC may contain multiple queues. Select specific +What: /sys/kernel/debug/hisi_sec2//qm/current_q +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: One QM of SEC may contain multiple queues. Select specific queue to show its debug registers in above 'regs'. Only available for PF. -What: /sys/kernel/debug/hisi_sec2//qm/clear_enable -Date: Oct 2019 -Contact: linux-crypto@vger.kernel.org -Description: Enabling/disabling of clear action after reading +What: /sys/kernel/debug/hisi_sec2//qm/clear_enable +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Enabling/disabling of clear action after reading the SEC's QM debug registers. 0: disable, 1: enable. Only available for PF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//qm/err_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of invalid interrupts for +What: /sys/kernel/debug/hisi_sec2//qm/err_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of invalid interrupts for QM task completion. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//qm/aeq_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of QM async event queue interrupts. +What: /sys/kernel/debug/hisi_sec2//qm/aeq_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of QM async event queue interrupts. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//qm/abnormal_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of interrupts for QM abnormal event. +What: /sys/kernel/debug/hisi_sec2//qm/abnormal_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of interrupts for QM abnormal event. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//qm/create_qp_err -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of queue allocation errors. +What: /sys/kernel/debug/hisi_sec2//qm/create_qp_err +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of queue allocation errors. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//qm/mb_err -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of failed QM mailbox commands. +What: /sys/kernel/debug/hisi_sec2//qm/mb_err +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of failed QM mailbox commands. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//qm/status -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the status of the QM. +What: /sys/kernel/debug/hisi_sec2//qm/status +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the status of the QM. Four states: initiated, started, stopped and closed. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//sec_dfx/send_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of sent requests. +What: /sys/kernel/debug/hisi_sec2//sec_dfx/send_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of sent requests. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//sec_dfx/recv_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of received requests. +What: /sys/kernel/debug/hisi_sec2//sec_dfx/recv_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of received requests. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//sec_dfx/send_busy_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of requests sent with returning busy. +What: /sys/kernel/debug/hisi_sec2//sec_dfx/send_busy_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of requests sent with returning busy. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//sec_dfx/err_bd_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of BD type error requests +What: /sys/kernel/debug/hisi_sec2//sec_dfx/err_bd_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of BD type error requests to be received. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//sec_dfx/invalid_req_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of invalid requests being received. +What: /sys/kernel/debug/hisi_sec2//sec_dfx/invalid_req_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of invalid requests being received. Available for both PF and VF, and take no other effect on SEC. -What: /sys/kernel/debug/hisi_sec2//sec_dfx/done_flag_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of completed but marked error requests +What: /sys/kernel/debug/hisi_sec2//sec_dfx/done_flag_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of completed but marked error requests to be received. Available for both PF and VF, and take no other effect on SEC. diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip index 3034a2bf99ca5..765e29203ef33 100644 --- a/Documentation/ABI/testing/debugfs-hisi-zip +++ b/Documentation/ABI/testing/debugfs-hisi-zip @@ -1,114 +1,114 @@ -What: /sys/kernel/debug/hisi_zip//comp_core[01]/regs -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: Dump of compression cores related debug registers. +What: /sys/kernel/debug/hisi_zip//comp_core[01]/regs +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: Dump of compression cores related debug registers. Only available for PF. -What: /sys/kernel/debug/hisi_zip//decomp_core[0-5]/regs -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: Dump of decompression cores related debug registers. +What: /sys/kernel/debug/hisi_zip//decomp_core[0-5]/regs +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: Dump of decompression cores related debug registers. Only available for PF. -What: /sys/kernel/debug/hisi_zip//clear_enable -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: Compression/decompression core debug registers read clear +What: /sys/kernel/debug/hisi_zip//clear_enable +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: Compression/decompression core debug registers read clear control. 1 means enable register read clear, otherwise 0. Writing to this file has no functional effect, only enable or disable counters clear after reading of these registers. Only available for PF. -What: /sys/kernel/debug/hisi_zip//current_qm -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: One ZIP controller has one PF and multiple VFs, each function +What: /sys/kernel/debug/hisi_zip//current_qm +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: One ZIP controller has one PF and multiple VFs, each function has a QM. Select the QM which below qm refers to. Only available for PF. -What: /sys/kernel/debug/hisi_zip//qm/regs -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: Dump of QM related debug registers. +What: /sys/kernel/debug/hisi_zip//qm/regs +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: Dump of QM related debug registers. Available for PF and VF in host. VF in guest currently only has one debug register. -What: /sys/kernel/debug/hisi_zip//qm/current_q -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: One QM may contain multiple queues. Select specific queue to +What: /sys/kernel/debug/hisi_zip//qm/current_q +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: One QM may contain multiple queues. Select specific queue to show its debug registers in above regs. Only available for PF. -What: /sys/kernel/debug/hisi_zip//qm/clear_enable -Date: Nov 2018 -Contact: linux-crypto@vger.kernel.org -Description: QM debug registers(regs) read clear control. 1 means enable +What: /sys/kernel/debug/hisi_zip//qm/clear_enable +Date: Nov 2018 +Contact: linux-crypto@vger.kernel.org +Description: QM debug registers(regs) read clear control. 1 means enable register read clear, otherwise 0. Writing to this file has no functional effect, only enable or disable counters clear after reading of these registers. Only available for PF. -What: /sys/kernel/debug/hisi_zip//qm/err_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of invalid interrupts for +What: /sys/kernel/debug/hisi_zip//qm/err_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of invalid interrupts for QM task completion. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//qm/aeq_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of QM async event queue interrupts. +What: /sys/kernel/debug/hisi_zip//qm/aeq_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of QM async event queue interrupts. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//qm/abnormal_irq -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of interrupts for QM abnormal event. +What: /sys/kernel/debug/hisi_zip//qm/abnormal_irq +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of interrupts for QM abnormal event. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//qm/create_qp_err -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of queue allocation errors. +What: /sys/kernel/debug/hisi_zip//qm/create_qp_err +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of queue allocation errors. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//qm/mb_err -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the number of failed QM mailbox commands. +What: /sys/kernel/debug/hisi_zip//qm/mb_err +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the number of failed QM mailbox commands. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//qm/status -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the status of the QM. +What: /sys/kernel/debug/hisi_zip//qm/status +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the status of the QM. Four states: initiated, started, stopped and closed. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//zip_dfx/send_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of sent requests. +What: /sys/kernel/debug/hisi_zip//zip_dfx/send_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of sent requests. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//zip_dfx/recv_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of received requests. +What: /sys/kernel/debug/hisi_zip//zip_dfx/recv_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of received requests. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//zip_dfx/send_busy_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of requests received +What: /sys/kernel/debug/hisi_zip//zip_dfx/send_busy_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of requests received with returning busy. Available for both PF and VF, and take no other effect on ZIP. -What: /sys/kernel/debug/hisi_zip//zip_dfx/err_bd_cnt -Date: Apr 2020 -Contact: linux-crypto@vger.kernel.org -Description: Dump the total number of BD type error requests +What: /sys/kernel/debug/hisi_zip//zip_dfx/err_bd_cnt +Date: Apr 2020 +Contact: linux-crypto@vger.kernel.org +Description: Dump the total number of BD type error requests to be received. Available for both PF and VF, and take no other effect on ZIP. -- GitLab From 535761866ec36e85ef32b4d41e8757307506bd17 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 4 Jan 2022 16:29:17 +0800 Subject: [PATCH 0069/1586] Documentation: update debugfs doc for Hisilicon SEC Update documentation describing DebugFS for function's QoS limiting. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-sec | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec index 58cfa354534c7..2bf84ced484b7 100644 --- a/Documentation/ABI/testing/debugfs-hisi-sec +++ b/Documentation/ABI/testing/debugfs-hisi-sec @@ -14,6 +14,16 @@ Description: One SEC controller has one PF and multiple VFs, each function qm refers to. Only available for PF. +What: /sys/kernel/debug/hisi_sec2//alg_qos +Date: Jun 2021 +Contact: linux-crypto@vger.kernel.org +Description: The is related the function for PF and VF. + SEC driver supports to configure each function's QoS, the driver + supports to write value to alg_qos in the host. Such as + "echo value > alg_qos". The qos value is 1~1000, means + 1/1000~1000/1000 of total QoS. The driver reading alg_qos to + get related QoS in the host and VM, Such as "cat alg_qos". + What: /sys/kernel/debug/hisi_sec2//qm/qm_regs Date: Oct 2019 Contact: linux-crypto@vger.kernel.org -- GitLab From f6fb93ccc8c835efd15960181c768cd7c0d204a6 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 4 Jan 2022 16:29:18 +0800 Subject: [PATCH 0070/1586] Documentation: update debugfs doc for Hisilicon ZIP Update documentation describing DebugFS for function's QoS limiting. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-zip | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip index 765e29203ef33..bf1258bc64959 100644 --- a/Documentation/ABI/testing/debugfs-hisi-zip +++ b/Documentation/ABI/testing/debugfs-hisi-zip @@ -26,6 +26,16 @@ Description: One ZIP controller has one PF and multiple VFs, each function has a QM. Select the QM which below qm refers to. Only available for PF. +What: /sys/kernel/debug/hisi_zip//alg_qos +Date: Jun 2021 +Contact: linux-crypto@vger.kernel.org +Description: The is related the function for PF and VF. + ZIP driver supports to configure each function's QoS, the driver + supports to write value to alg_qos in the host. Such as + "echo value > alg_qos". The qos value is 1~1000, means + 1/1000~1000/1000 of total QoS. The driver reading alg_qos to + get related QoS in the host and VM, Such as "cat alg_qos". + What: /sys/kernel/debug/hisi_zip//qm/regs Date: Nov 2018 Contact: linux-crypto@vger.kernel.org -- GitLab From 68ce6126e11364dd5b3a5c5113b4285c56d8d249 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 4 Jan 2022 16:29:19 +0800 Subject: [PATCH 0071/1586] Documentation: update debugfs doc for Hisilicon HPRE Update documentation describing DebugFS for function's QoS limiting. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-hpre | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre index 68d4dcb1538d9..396de7bc735d0 100644 --- a/Documentation/ABI/testing/debugfs-hisi-hpre +++ b/Documentation/ABI/testing/debugfs-hisi-hpre @@ -27,6 +27,16 @@ Description: One HPRE controller has one PF and multiple VFs, each function has a QM. Select the QM which below qm refers to. Only available for PF. +What: /sys/kernel/debug/hisi_hpre//alg_qos +Date: Jun 2021 +Contact: linux-crypto@vger.kernel.org +Description: The is related the function for PF and VF. + HPRE driver supports to configure each function's QoS, the driver + supports to write value to alg_qos in the host. Such as + "echo value > alg_qos". The qos value is 1~1000, means + 1/1000~1000/1000 of total QoS. The driver reading alg_qos to + get related QoS in the host and VM, Such as "cat alg_qos". + What: /sys/kernel/debug/hisi_hpre//regs Date: Sep 2019 Contact: linux-crypto@vger.kernel.org -- GitLab From eb90686d5d10fef9cadd9c0eb30f3fee66d2b2a5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:55 +0800 Subject: [PATCH 0072/1586] crypto: sm3 - create SM3 stand-alone library Stand-alone implementation of the SM3 algorithm. It is designed to have as little dependencies as possible. In other cases you should generally use the hash APIs from include/crypto/hash.h. Especially when hashing large amounts of data as those APIs may be hw-accelerated. In the new SM3 stand-alone library, sm3_transform() has also been optimized, instead of simply using the code in sm3_generic. Signed-off-by: Tianjia Zhang Reviewed-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- include/crypto/sm3.h | 32 ++++++ lib/crypto/Kconfig | 3 + lib/crypto/Makefile | 3 + lib/crypto/sm3.c | 246 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+) create mode 100644 lib/crypto/sm3.c diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index 42ea21289ba95..b5fb6d1bf2479 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -1,5 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Common values for SM3 algorithm + * + * Copyright (C) 2017 ARM Limited or its affiliates. + * Copyright (C) 2017 Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang */ #ifndef _CRYPTO_SM3_H @@ -39,4 +44,31 @@ extern int crypto_sm3_final(struct shash_desc *desc, u8 *out); extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *hash); + +/* + * Stand-alone implementation of the SM3 algorithm. It is designed to + * have as little dependencies as possible so it can be used in the + * kexec_file purgatory. In other cases you should generally use the + * hash APIs from include/crypto/hash.h. Especially when hashing large + * amounts of data as those APIs may be hw-accelerated. + * + * For details see lib/crypto/sm3.c + */ + +static inline void sm3_init(struct sm3_state *sctx) +{ + sctx->state[0] = SM3_IVA; + sctx->state[1] = SM3_IVB; + sctx->state[2] = SM3_IVC; + sctx->state[3] = SM3_IVD; + sctx->state[4] = SM3_IVE; + sctx->state[5] = SM3_IVF; + sctx->state[6] = SM3_IVG; + sctx->state[7] = SM3_IVH; + sctx->count = 0; +} + +void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len); +void sm3_final(struct sm3_state *sctx, u8 *out); + #endif diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index e8e525650cf29..379a66d7f504c 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -123,6 +123,9 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA256 tristate +config CRYPTO_LIB_SM3 + tristate + config CRYPTO_LIB_SM4 tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index ed43a41f2dcc8..6c872d05d1e62 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -37,6 +37,9 @@ libpoly1305-y += poly1305.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o +obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o +libsm3-y := sm3.o + obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o libsm4-y := sm4.o diff --git a/lib/crypto/sm3.c b/lib/crypto/sm3.c new file mode 100644 index 0000000000000..d473e358a873a --- /dev/null +++ b/lib/crypto/sm3.c @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * SM3 secure hash, as specified by OSCCA GM/T 0004-2012 SM3 and described + * at https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2017 ARM Limited or its affiliates. + * Copyright (C) 2017 Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang + */ + +#include +#include +#include + +static const u32 ____cacheline_aligned K[64] = { + 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb, + 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc, + 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce, + 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6, + 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c, + 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce, + 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec, + 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5, + 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53, + 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d, + 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4, + 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43, + 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c, + 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce, + 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec, + 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 +}; + +/* + * Transform the message X which consists of 16 32-bit-words. See + * GM/T 004-2012 for details. + */ +#define R(i, a, b, c, d, e, f, g, h, t, w1, w2) \ + do { \ + ss1 = rol32((rol32((a), 12) + (e) + (t)), 7); \ + ss2 = ss1 ^ rol32((a), 12); \ + d += FF ## i(a, b, c) + ss2 + ((w1) ^ (w2)); \ + h += GG ## i(e, f, g) + ss1 + (w1); \ + b = rol32((b), 9); \ + f = rol32((f), 19); \ + h = P0((h)); \ + } while (0) + +#define R1(a, b, c, d, e, f, g, h, t, w1, w2) \ + R(1, a, b, c, d, e, f, g, h, t, w1, w2) +#define R2(a, b, c, d, e, f, g, h, t, w1, w2) \ + R(2, a, b, c, d, e, f, g, h, t, w1, w2) + +#define FF1(x, y, z) (x ^ y ^ z) +#define FF2(x, y, z) ((x & y) | (x & z) | (y & z)) + +#define GG1(x, y, z) FF1(x, y, z) +#define GG2(x, y, z) ((x & y) | (~x & z)) + +/* Message expansion */ +#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x), 17)) +#define P1(x) ((x) ^ rol32((x), 15) ^ rol32((x), 23)) +#define I(i) (W[i] = get_unaligned_be32(data + i * 4)) +#define W1(i) (W[i & 0x0f]) +#define W2(i) (W[i & 0x0f] = \ + P1(W[i & 0x0f] \ + ^ W[(i-9) & 0x0f] \ + ^ rol32(W[(i-3) & 0x0f], 15)) \ + ^ rol32(W[(i-13) & 0x0f], 7) \ + ^ W[(i-6) & 0x0f]) + +static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16]) +{ + u32 a, b, c, d, e, f, g, h, ss1, ss2; + + a = sctx->state[0]; + b = sctx->state[1]; + c = sctx->state[2]; + d = sctx->state[3]; + e = sctx->state[4]; + f = sctx->state[5]; + g = sctx->state[6]; + h = sctx->state[7]; + + R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4)); + R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5)); + R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6)); + R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7)); + R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8)); + R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9)); + R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10)); + R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11)); + R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12)); + R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13)); + R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14)); + R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15)); + R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16)); + R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17)); + R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18)); + R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19)); + + R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20)); + R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21)); + R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22)); + R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23)); + R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24)); + R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25)); + R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26)); + R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27)); + R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28)); + R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29)); + R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30)); + R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31)); + R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32)); + R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33)); + R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34)); + R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35)); + + R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36)); + R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37)); + R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38)); + R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39)); + R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40)); + R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41)); + R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42)); + R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43)); + R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44)); + R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45)); + R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46)); + R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47)); + R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48)); + R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49)); + R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50)); + R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51)); + + R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52)); + R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53)); + R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54)); + R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55)); + R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56)); + R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57)); + R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58)); + R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59)); + R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60)); + R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61)); + R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62)); + R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63)); + R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64)); + R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65)); + R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66)); + R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67)); + + sctx->state[0] ^= a; + sctx->state[1] ^= b; + sctx->state[2] ^= c; + sctx->state[3] ^= d; + sctx->state[4] ^= e; + sctx->state[5] ^= f; + sctx->state[6] ^= g; + sctx->state[7] ^= h; +} +#undef R +#undef R1 +#undef R2 +#undef I +#undef W1 +#undef W2 + +static inline void sm3_block(struct sm3_state *sctx, + u8 const *data, int blocks, u32 W[16]) +{ + while (blocks--) { + sm3_transform(sctx, data, W); + data += SM3_BLOCK_SIZE; + } +} + +void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len) +{ + unsigned int partial = sctx->count % SM3_BLOCK_SIZE; + u32 W[16]; + + sctx->count += len; + + if ((partial + len) >= SM3_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SM3_BLOCK_SIZE - partial; + + memcpy(sctx->buffer + partial, data, p); + data += p; + len -= p; + + sm3_block(sctx, sctx->buffer, 1, W); + } + + blocks = len / SM3_BLOCK_SIZE; + len %= SM3_BLOCK_SIZE; + + if (blocks) { + sm3_block(sctx, data, blocks, W); + data += blocks * SM3_BLOCK_SIZE; + } + + memzero_explicit(W, sizeof(W)); + + partial = 0; + } + if (len) + memcpy(sctx->buffer + partial, data, len); +} +EXPORT_SYMBOL_GPL(sm3_update); + +void sm3_final(struct sm3_state *sctx, u8 *out) +{ + const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64); + __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); + __be32 *digest = (__be32 *)out; + unsigned int partial = sctx->count % SM3_BLOCK_SIZE; + u32 W[16]; + int i; + + sctx->buffer[partial++] = 0x80; + if (partial > bit_offset) { + memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial); + partial = 0; + + sm3_block(sctx, sctx->buffer, 1, W); + } + + memset(sctx->buffer + partial, 0, bit_offset - partial); + *bits = cpu_to_be64(sctx->count << 3); + sm3_block(sctx, sctx->buffer, 1, W); + + for (i = 0; i < 8; i++) + put_unaligned_be32(sctx->state[i], digest++); + + /* Zeroize sensitive information. */ + memzero_explicit(W, sizeof(W)); + memzero_explicit(sctx, sizeof(*sctx)); +} +EXPORT_SYMBOL_GPL(sm3_final); + +MODULE_DESCRIPTION("Generic SM3 library"); +MODULE_LICENSE("GPL v2"); -- GitLab From f3a03d319dbdbb206530ebfce977c334ee2f8765 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:56 +0800 Subject: [PATCH 0073/1586] crypto: arm64/sm3-ce - make dependent on sm3 library SM3 generic library is stand-alone implementation, sm3-ce can depend on the SM3 library instead of sm3-generic. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 2 +- arch/arm64/crypto/sm3-ce-glue.c | 28 ++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index addfa413650bd..2a965aa0188dd 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -45,7 +45,7 @@ config CRYPTO_SM3_ARM64_CE tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_SM3 + select CRYPTO_LIB_SM3 config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index d71faca322f2a..ee98954ae8ca6 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -26,8 +26,10 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src, static int sm3_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!crypto_simd_usable()) - return crypto_sm3_update(desc, data, len); + if (!crypto_simd_usable()) { + sm3_update(shash_desc_ctx(desc), data, len); + return 0; + } kernel_neon_begin(); sm3_base_do_update(desc, data, len, sm3_ce_transform); @@ -38,8 +40,10 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, static int sm3_ce_final(struct shash_desc *desc, u8 *out) { - if (!crypto_simd_usable()) - return crypto_sm3_finup(desc, NULL, 0, out); + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } kernel_neon_begin(); sm3_base_do_finalize(desc, sm3_ce_transform); @@ -51,14 +55,22 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out) static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!crypto_simd_usable()) - return crypto_sm3_finup(desc, data, len, out); + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, out); + return 0; + } kernel_neon_begin(); - sm3_base_do_update(desc, data, len, sm3_ce_transform); + if (len) + sm3_base_do_update(desc, data, len, sm3_ce_transform); + sm3_base_do_finalize(desc, sm3_ce_transform); kernel_neon_end(); - return sm3_ce_final(desc, out); + return sm3_base_finish(desc, out); } static struct shash_alg sm3_alg = { -- GitLab From 114004696bf23499ca834e784d91bd82de195d76 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:57 +0800 Subject: [PATCH 0074/1586] crypto: sm2 - make dependent on sm3 library SM3 generic library is stand-alone implementation, it is necessary for the calculation of sm2 z digest to depends on SM3 library instead of sm3-generic. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/Kconfig | 2 +- crypto/sm2.c | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 442765219c375..833da41860b98 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -267,7 +267,7 @@ config CRYPTO_ECRDSA config CRYPTO_SM2 tristate "SM2 algorithm" - select CRYPTO_SM3 + select CRYPTO_LIB_SM3 select CRYPTO_AKCIPHER select CRYPTO_MANAGER select MPILIB diff --git a/crypto/sm2.c b/crypto/sm2.c index db8a4a265669d..ae3f77a660703 100644 --- a/crypto/sm2.c +++ b/crypto/sm2.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include "sm2signature.asn1.h" @@ -213,7 +213,7 @@ int sm2_get_signature_s(void *context, size_t hdrlen, unsigned char tag, return 0; } -static int sm2_z_digest_update(struct shash_desc *desc, +static int sm2_z_digest_update(struct sm3_state *sctx, MPI m, unsigned int pbytes) { static const unsigned char zero[32]; @@ -226,20 +226,20 @@ static int sm2_z_digest_update(struct shash_desc *desc, if (inlen < pbytes) { /* padding with zero */ - crypto_sm3_update(desc, zero, pbytes - inlen); - crypto_sm3_update(desc, in, inlen); + sm3_update(sctx, zero, pbytes - inlen); + sm3_update(sctx, in, inlen); } else if (inlen > pbytes) { /* skip the starting zero */ - crypto_sm3_update(desc, in + inlen - pbytes, pbytes); + sm3_update(sctx, in + inlen - pbytes, pbytes); } else { - crypto_sm3_update(desc, in, inlen); + sm3_update(sctx, in, inlen); } kfree(in); return 0; } -static int sm2_z_digest_update_point(struct shash_desc *desc, +static int sm2_z_digest_update_point(struct sm3_state *sctx, MPI_POINT point, struct mpi_ec_ctx *ec, unsigned int pbytes) { MPI x, y; @@ -249,8 +249,8 @@ static int sm2_z_digest_update_point(struct shash_desc *desc, y = mpi_new(0); if (!mpi_ec_get_affine(x, y, point, ec) && - !sm2_z_digest_update(desc, x, pbytes) && - !sm2_z_digest_update(desc, y, pbytes)) + !sm2_z_digest_update(sctx, x, pbytes) && + !sm2_z_digest_update(sctx, y, pbytes)) ret = 0; mpi_free(x); @@ -265,7 +265,7 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm, struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm); uint16_t bits_len; unsigned char entl[2]; - SHASH_DESC_ON_STACK(desc, NULL); + struct sm3_state sctx; unsigned int pbytes; if (id_len > (USHRT_MAX / 8) || !ec->Q) @@ -278,17 +278,17 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm, pbytes = MPI_NBYTES(ec->p); /* ZA = H256(ENTLA | IDA | a | b | xG | yG | xA | yA) */ - sm3_base_init(desc); - crypto_sm3_update(desc, entl, 2); - crypto_sm3_update(desc, id, id_len); - - if (sm2_z_digest_update(desc, ec->a, pbytes) || - sm2_z_digest_update(desc, ec->b, pbytes) || - sm2_z_digest_update_point(desc, ec->G, ec, pbytes) || - sm2_z_digest_update_point(desc, ec->Q, ec, pbytes)) + sm3_init(&sctx); + sm3_update(&sctx, entl, 2); + sm3_update(&sctx, id, id_len); + + if (sm2_z_digest_update(&sctx, ec->a, pbytes) || + sm2_z_digest_update(&sctx, ec->b, pbytes) || + sm2_z_digest_update_point(&sctx, ec->G, ec, pbytes) || + sm2_z_digest_update_point(&sctx, ec->Q, ec, pbytes)) return -EINVAL; - crypto_sm3_final(desc, dgst); + sm3_final(&sctx, dgst); return 0; } EXPORT_SYMBOL(sm2_compute_z_digest); -- GitLab From b4784a45ea69577f21f89898c71127774a090a2a Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:58 +0800 Subject: [PATCH 0075/1586] crypto: sm3 - make dependent on sm3 library SM3 generic library is stand-alone implementation, it is necessary making the sm3-generic implementation to depends on SM3 library. The functions crypto_sm3_*() provided by sm3_generic is no longer exported. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/sm3_generic.c | 142 +++++-------------------------------------- include/crypto/sm3.h | 10 --- 3 files changed, 16 insertions(+), 137 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 833da41860b98..b07174195e984 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -999,6 +999,7 @@ config CRYPTO_SHA3 config CRYPTO_SM3 tristate "SM3 digest algorithm" select CRYPTO_HASH + select CRYPTO_LIB_SM3 help SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3). It is part of the Chinese Commercial Cryptography suite. diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c index 193c4584bd004..a215c1c37e730 100644 --- a/crypto/sm3_generic.c +++ b/crypto/sm3_generic.c @@ -5,6 +5,7 @@ * * Copyright (C) 2017 ARM Limited or its affiliates. * Written by Gilad Ben-Yossef + * Copyright (C) 2021 Tianjia Zhang */ #include @@ -26,143 +27,29 @@ const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = { }; EXPORT_SYMBOL_GPL(sm3_zero_message_hash); -static inline u32 p0(u32 x) -{ - return x ^ rol32(x, 9) ^ rol32(x, 17); -} - -static inline u32 p1(u32 x) -{ - return x ^ rol32(x, 15) ^ rol32(x, 23); -} - -static inline u32 ff(unsigned int n, u32 a, u32 b, u32 c) -{ - return (n < 16) ? (a ^ b ^ c) : ((a & b) | (a & c) | (b & c)); -} - -static inline u32 gg(unsigned int n, u32 e, u32 f, u32 g) -{ - return (n < 16) ? (e ^ f ^ g) : ((e & f) | ((~e) & g)); -} - -static inline u32 t(unsigned int n) -{ - return (n < 16) ? SM3_T1 : SM3_T2; -} - -static void sm3_expand(u32 *t, u32 *w, u32 *wt) -{ - int i; - unsigned int tmp; - - /* load the input */ - for (i = 0; i <= 15; i++) - w[i] = get_unaligned_be32((__u32 *)t + i); - - for (i = 16; i <= 67; i++) { - tmp = w[i - 16] ^ w[i - 9] ^ rol32(w[i - 3], 15); - w[i] = p1(tmp) ^ (rol32(w[i - 13], 7)) ^ w[i - 6]; - } - - for (i = 0; i <= 63; i++) - wt[i] = w[i] ^ w[i + 4]; -} - -static void sm3_compress(u32 *w, u32 *wt, u32 *m) -{ - u32 ss1; - u32 ss2; - u32 tt1; - u32 tt2; - u32 a, b, c, d, e, f, g, h; - int i; - - a = m[0]; - b = m[1]; - c = m[2]; - d = m[3]; - e = m[4]; - f = m[5]; - g = m[6]; - h = m[7]; - - for (i = 0; i <= 63; i++) { - - ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i & 31)), 7); - - ss2 = ss1 ^ rol32(a, 12); - - tt1 = ff(i, a, b, c) + d + ss2 + *wt; - wt++; - - tt2 = gg(i, e, f, g) + h + ss1 + *w; - w++; - - d = c; - c = rol32(b, 9); - b = a; - a = tt1; - h = g; - g = rol32(f, 19); - f = e; - e = p0(tt2); - } - - m[0] = a ^ m[0]; - m[1] = b ^ m[1]; - m[2] = c ^ m[2]; - m[3] = d ^ m[3]; - m[4] = e ^ m[4]; - m[5] = f ^ m[5]; - m[6] = g ^ m[6]; - m[7] = h ^ m[7]; - - a = b = c = d = e = f = g = h = ss1 = ss2 = tt1 = tt2 = 0; -} - -static void sm3_transform(struct sm3_state *sst, u8 const *src) -{ - unsigned int w[68]; - unsigned int wt[64]; - - sm3_expand((u32 *)src, w, wt); - sm3_compress(w, wt, sst->state); - - memzero_explicit(w, sizeof(w)); - memzero_explicit(wt, sizeof(wt)); -} - -static void sm3_generic_block_fn(struct sm3_state *sst, u8 const *src, - int blocks) -{ - while (blocks--) { - sm3_transform(sst, src); - src += SM3_BLOCK_SIZE; - } -} - -int crypto_sm3_update(struct shash_desc *desc, const u8 *data, +static int crypto_sm3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sm3_base_do_update(desc, data, len, sm3_generic_block_fn); + sm3_update(shash_desc_ctx(desc), data, len); + return 0; } -EXPORT_SYMBOL(crypto_sm3_update); -int crypto_sm3_final(struct shash_desc *desc, u8 *out) +static int crypto_sm3_final(struct shash_desc *desc, u8 *out) { - sm3_base_do_finalize(desc, sm3_generic_block_fn); - return sm3_base_finish(desc, out); + sm3_final(shash_desc_ctx(desc), out); + return 0; } -EXPORT_SYMBOL(crypto_sm3_final); -int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, +static int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *hash) { - sm3_base_do_update(desc, data, len, sm3_generic_block_fn); - return crypto_sm3_final(desc, hash); + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, hash); + return 0; } -EXPORT_SYMBOL(crypto_sm3_finup); static struct shash_alg sm3_alg = { .digestsize = SM3_DIGEST_SIZE, @@ -174,6 +61,7 @@ static struct shash_alg sm3_alg = { .base = { .cra_name = "sm3", .cra_driver_name = "sm3-generic", + .cra_priority = 100, .cra_blocksize = SM3_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index b5fb6d1bf2479..1f021ad0533ff 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -35,16 +35,6 @@ struct sm3_state { u8 buffer[SM3_BLOCK_SIZE]; }; -struct shash_desc; - -extern int crypto_sm3_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sm3_final(struct shash_desc *desc, u8 *out); - -extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - /* * Stand-alone implementation of the SM3 algorithm. It is designed to * have as little dependencies as possible so it can be used in the -- GitLab From 930ab34d906d9c44727c9dcfeafcfcd33e3639e7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:06:59 +0800 Subject: [PATCH 0076/1586] crypto: x86/sm3 - add AVX assembly implementation This patch adds AVX assembly accelerated implementation of SM3 secure hash algorithm. From the benchmark data, compared to pure software implementation sm3-generic, the performance increase is up to 38%. The main algorithm implementation based on SM3 AES/BMI2 accelerated work by libgcrypt at: https://gnupg.org/software/libgcrypt/index.html Benchmark on Intel i5-6200U 2.30GHz, performance data of two implementations, pure software sm3-generic and sm3-avx acceleration. The data comes from the 326 mode and 422 mode of tcrypt. The abscissas are different lengths of per update. The data is tabulated and the unit is Mb/s: update-size | 16 64 256 1024 2048 4096 8192 ------------+------------------------------------------------------- sm3-generic | 105.97 129.60 182.12 189.62 188.06 193.66 194.88 sm3-avx | 119.87 163.05 244.44 260.92 257.60 264.87 265.88 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/sm3-avx-asm_64.S | 517 +++++++++++++++++++++++++++++++ arch/x86/crypto/sm3_avx_glue.c | 134 ++++++++ crypto/Kconfig | 13 + 4 files changed, 667 insertions(+) create mode 100644 arch/x86/crypto/sm3-avx-asm_64.S create mode 100644 arch/x86/crypto/sm3_avx_glue.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index c3af959648e62..2831685adf6fb 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -90,6 +90,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o +obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o +sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o + obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S new file mode 100644 index 0000000000000..71e6aae23e17c --- /dev/null +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -0,0 +1,517 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 AVX accelerated transform. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Jussi Kivilinna + * Copyright (C) 2021 Tianjia Zhang + */ + +/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at: + * https://gnupg.org/software/libgcrypt/index.html + */ + +#include +#include + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 +#define state_h5 20 +#define state_h6 24 +#define state_h7 28 + +/* Constants */ + +/* Round constant macros */ + +#define K0 2043430169 /* 0x79cc4519 */ +#define K1 -208106958 /* 0xf3988a32 */ +#define K2 -416213915 /* 0xe7311465 */ +#define K3 -832427829 /* 0xce6228cb */ +#define K4 -1664855657 /* 0x9cc45197 */ +#define K5 965255983 /* 0x3988a32f */ +#define K6 1930511966 /* 0x7311465e */ +#define K7 -433943364 /* 0xe6228cbc */ +#define K8 -867886727 /* 0xcc451979 */ +#define K9 -1735773453 /* 0x988a32f3 */ +#define K10 823420391 /* 0x311465e7 */ +#define K11 1646840782 /* 0x6228cbce */ +#define K12 -1001285732 /* 0xc451979c */ +#define K13 -2002571463 /* 0x88a32f39 */ +#define K14 289824371 /* 0x11465e73 */ +#define K15 579648742 /* 0x228cbce6 */ +#define K16 -1651869049 /* 0x9d8a7a87 */ +#define K17 991229199 /* 0x3b14f50f */ +#define K18 1982458398 /* 0x7629ea1e */ +#define K19 -330050500 /* 0xec53d43c */ +#define K20 -660100999 /* 0xd8a7a879 */ +#define K21 -1320201997 /* 0xb14f50f3 */ +#define K22 1654563303 /* 0x629ea1e7 */ +#define K23 -985840690 /* 0xc53d43ce */ +#define K24 -1971681379 /* 0x8a7a879d */ +#define K25 351604539 /* 0x14f50f3b */ +#define K26 703209078 /* 0x29ea1e76 */ +#define K27 1406418156 /* 0x53d43cec */ +#define K28 -1482130984 /* 0xa7a879d8 */ +#define K29 1330705329 /* 0x4f50f3b1 */ +#define K30 -1633556638 /* 0x9ea1e762 */ +#define K31 1027854021 /* 0x3d43cec5 */ +#define K32 2055708042 /* 0x7a879d8a */ +#define K33 -183551212 /* 0xf50f3b14 */ +#define K34 -367102423 /* 0xea1e7629 */ +#define K35 -734204845 /* 0xd43cec53 */ +#define K36 -1468409689 /* 0xa879d8a7 */ +#define K37 1358147919 /* 0x50f3b14f */ +#define K38 -1578671458 /* 0xa1e7629e */ +#define K39 1137624381 /* 0x43cec53d */ +#define K40 -2019718534 /* 0x879d8a7a */ +#define K41 255530229 /* 0x0f3b14f5 */ +#define K42 511060458 /* 0x1e7629ea */ +#define K43 1022120916 /* 0x3cec53d4 */ +#define K44 2044241832 /* 0x79d8a7a8 */ +#define K45 -206483632 /* 0xf3b14f50 */ +#define K46 -412967263 /* 0xe7629ea1 */ +#define K47 -825934525 /* 0xcec53d43 */ +#define K48 -1651869049 /* 0x9d8a7a87 */ +#define K49 991229199 /* 0x3b14f50f */ +#define K50 1982458398 /* 0x7629ea1e */ +#define K51 -330050500 /* 0xec53d43c */ +#define K52 -660100999 /* 0xd8a7a879 */ +#define K53 -1320201997 /* 0xb14f50f3 */ +#define K54 1654563303 /* 0x629ea1e7 */ +#define K55 -985840690 /* 0xc53d43ce */ +#define K56 -1971681379 /* 0x8a7a879d */ +#define K57 351604539 /* 0x14f50f3b */ +#define K58 703209078 /* 0x29ea1e76 */ +#define K59 1406418156 /* 0x53d43cec */ +#define K60 -1482130984 /* 0xa7a879d8 */ +#define K61 1330705329 /* 0x4f50f3b1 */ +#define K62 -1633556638 /* 0x9ea1e762 */ +#define K63 1027854021 /* 0x3d43cec5 */ + +/* Register macros */ + +#define RSTATE %rdi +#define RDATA %rsi +#define RNBLKS %rdx + +#define t0 %eax +#define t1 %ebx +#define t2 %ecx + +#define a %r8d +#define b %r9d +#define c %r10d +#define d %r11d +#define e %r12d +#define f %r13d +#define g %r14d +#define h %r15d + +#define W0 %xmm0 +#define W1 %xmm1 +#define W2 %xmm2 +#define W3 %xmm3 +#define W4 %xmm4 +#define W5 %xmm5 + +#define XTMP0 %xmm6 +#define XTMP1 %xmm7 +#define XTMP2 %xmm8 +#define XTMP3 %xmm9 +#define XTMP4 %xmm10 +#define XTMP5 %xmm11 +#define XTMP6 %xmm12 + +#define BSWAP_REG %xmm15 + +/* Stack structure */ + +#define STACK_W_SIZE (32 * 2 * 3) +#define STACK_REG_SAVE_SIZE (64) + +#define STACK_W (0) +#define STACK_REG_SAVE (STACK_W + STACK_W_SIZE) +#define STACK_SIZE (STACK_REG_SAVE + STACK_REG_SAVE_SIZE) + +/* Instruction helpers. */ + +#define roll2(v, reg) \ + roll $(v), reg; + +#define roll3mov(v, src, dst) \ + movl src, dst; \ + roll $(v), dst; + +#define roll3(v, src, dst) \ + rorxl $(32-(v)), src, dst; + +#define addl2(a, out) \ + leal (a, out), out; + +/* Round function macros. */ + +#define GG1(x, y, z, o, t) \ + movl x, o; \ + xorl y, o; \ + xorl z, o; + +#define FF1(x, y, z, o, t) GG1(x, y, z, o, t) + +#define GG2(x, y, z, o, t) \ + andnl z, x, o; \ + movl y, t; \ + andl x, t; \ + addl2(t, o); + +#define FF2(x, y, z, o, t) \ + movl y, o; \ + xorl x, o; \ + movl y, t; \ + andl x, t; \ + andl z, o; \ + xorl t, o; + +#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \ + /* rol(a, 12) => t0 */ \ + roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \ + /* rol (t0 + e + t), 7) => t1 */ \ + leal K##round(t0, e, 1), t1; \ + roll2(7, t1); \ + /* h + w1 => h */ \ + addl wtype##_W1_ADDR(round, widx), h; \ + /* h + t1 => h */ \ + addl2(t1, h); \ + /* t1 ^ t0 => t0 */ \ + xorl t1, t0; \ + /* w1w2 + d => d */ \ + addl wtype##_W1W2_ADDR(round, widx), d; \ + /* FF##i(a,b,c) => t1 */ \ + FF##i(a, b, c, t1, t2); \ + /* d + t1 => d */ \ + addl2(t1, d); \ + /* GG#i(e,f,g) => t2 */ \ + GG##i(e, f, g, t2, t1); \ + /* h + t2 => h */ \ + addl2(t2, h); \ + /* rol (f, 19) => f */ \ + roll2(19, f); \ + /* d + t0 => d */ \ + addl2(t0, d); \ + /* rol (b, 9) => b */ \ + roll2(9, b); \ + /* P0(h) => h */ \ + roll3(9, h, t2); \ + roll3(17, h, t1); \ + xorl t2, h; \ + xorl t1, h; + +#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(1, a, b, c, d, e, f, g, h, round, widx, wtype) + +#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(2, a, b, c, d, e, f, g, h, round, widx, wtype) + +/* Input expansion macros. */ + +/* Byte-swapped input address. */ +#define IW_W_ADDR(round, widx, offs) \ + (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Expanded input address. */ +#define XW_W_ADDR(round, widx, offs) \ + (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Rounds 1-12, byte-swapped input block addresses. */ +#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 0) +#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32) + +/* Rounds 1-12, expanded input block addresses. */ +#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) +#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32) + +/* Input block loading. */ +#define LOAD_W_XMM_1() \ + vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \ + vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \ + vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \ + vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */ \ + vpshufb BSWAP_REG, XTMP0, XTMP0; \ + vpshufb BSWAP_REG, XTMP1, XTMP1; \ + vpshufb BSWAP_REG, XTMP2, XTMP2; \ + vpshufb BSWAP_REG, XTMP3, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP4; \ + vpxor XTMP1, XTMP2, XTMP5; \ + vpxor XTMP2, XTMP3, XTMP6; \ + leaq 64(RDATA), RDATA; \ + vmovdqa XTMP0, IW_W1_ADDR(0, 0); \ + vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \ + vmovdqa XTMP1, IW_W1_ADDR(4, 0); \ + vmovdqa XTMP5, IW_W1W2_ADDR(4, 0); + +#define LOAD_W_XMM_2() \ + vmovdqa XTMP2, IW_W1_ADDR(8, 0); \ + vmovdqa XTMP6, IW_W1W2_ADDR(8, 0); + +#define LOAD_W_XMM_3() \ + vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \ + vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \ + vmovdqa XTMP1, W2; /* W2: xx, w6, w5, w4 */ \ + vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \ + vpalignr $8, XTMP2, XTMP3, W4; /* W4: xx, w12, w11, w10 */ \ + vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */ + +/* Message scheduling. Note: 3 words per XMM register. */ +#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 16]) => XTMP0 */ \ + vpshufd $0b10111111, w0, XTMP0; \ + vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \ + /* Load (w[i - 13]) => XTMP1 */ \ + vpshufd $0b10111111, w1, XTMP1; \ + vpalignr $12, XTMP1, w2, XTMP1; \ + /* w[i - 9] == w3 */ \ + /* XMM3 ^ XTMP0 => XTMP0 */ \ + vpxor w3, XTMP0, XTMP0; + +#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 3] == w5 */ \ + /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ + vpslld $15, w5, XTMP2; \ + vpsrld $(32-15), w5, XTMP3; \ + vpxor XTMP2, XTMP3, XTMP3; \ + vpxor XTMP3, XTMP0, XTMP0; \ + /* rol(XTMP1, 7) => XTMP1 */ \ + vpslld $7, XTMP1, XTMP5; \ + vpsrld $(32-7), XTMP1, XTMP1; \ + vpxor XTMP5, XTMP1, XTMP1; \ + /* XMM4 ^ XTMP1 => XTMP1 */ \ + vpxor w4, XTMP1, XTMP1; \ + /* w[i - 6] == XMM4 */ \ + /* P1(XTMP0) ^ XTMP1 => XMM0 */ \ + vpslld $15, XTMP0, XTMP5; \ + vpsrld $(32-15), XTMP0, XTMP6; \ + vpslld $23, XTMP0, XTMP2; \ + vpsrld $(32-23), XTMP0, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP1; \ + vpxor XTMP6, XTMP5, XTMP5; \ + vpxor XTMP3, XTMP2, XTMP2; \ + vpxor XTMP2, XTMP5, XTMP5; \ + vpxor XTMP5, XTMP1, w0; + +#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \ + /* W1 in XMM12 */ \ + vpshufd $0b10111111, w4, XTMP4; \ + vpalignr $12, XTMP4, w5, XTMP4; \ + vmovdqa XTMP4, XW_W1_ADDR((round), 0); \ + /* W1 ^ W2 => XTMP1 */ \ + vpxor w0, XTMP4, XTMP1; \ + vmovdqa XTMP1, XW_W1W2_ADDR((round), 0); + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +.Lbe32mask: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + +.text + +/* + * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA. + * + * void sm3_transform_avx(struct sm3_state *state, + * const u8 *data, int nblocks); + */ +.align 16 +SYM_FUNC_START(sm3_transform_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblocks + */ + vzeroupper; + + pushq %rbp; + movq %rsp, %rbp; + + movq %rdx, RNBLKS; + + subq $STACK_SIZE, %rsp; + andq $(~63), %rsp; + + movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp); + movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp); + movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp); + movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp); + movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp); + + vmovdqa .Lbe32mask (%rip), BSWAP_REG; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + movl state_h5(RSTATE), f; + movl state_h6(RSTATE), g; + movl state_h7(RSTATE), h; + +.align 16 +.Loop: + /* Load data part1. */ + LOAD_W_XMM_1(); + + leaq -1(RNBLKS), RNBLKS; + + /* Transform 0-3 + Load data part2. */ + R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2(); + R1(d, a, b, c, h, e, f, g, 1, 1, IW); + R1(c, d, a, b, g, h, e, f, 2, 2, IW); + R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3(); + + /* Transform 4-7 + Precalc 12-14. */ + R1(a, b, c, d, e, f, g, h, 4, 0, IW); + R1(d, a, b, c, h, e, f, g, 5, 1, IW); + R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5); + R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5); + + /* Transform 8-11 + Precalc 12-17. */ + R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5); + R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0); + R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0); + R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0); + + /* Transform 12-14 + Precalc 18-20 */ + R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1); + R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1); + R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1); + + /* Transform 15-17 + Precalc 21-23 */ + R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2); + + /* Transform 18-20 + Precalc 24-26 */ + R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3); + + /* Transform 21-23 + Precalc 27-29 */ + R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4); + R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4); + + /* Transform 24-26 + Precalc 30-32 */ + R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5); + R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5); + R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5); + + /* Transform 27-29 + Precalc 33-35 */ + R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0); + R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0); + R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0); + + /* Transform 30-32 + Precalc 36-38 */ + R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1); + R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1); + R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1); + + /* Transform 33-35 + Precalc 39-41 */ + R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2); + R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2); + R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2); + + /* Transform 36-38 + Precalc 42-44 */ + R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3); + R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3); + R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3); + + /* Transform 39-41 + Precalc 45-47 */ + R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4); + R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4); + R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4); + + /* Transform 42-44 + Precalc 48-50 */ + R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5); + R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5); + R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5); + + /* Transform 45-47 + Precalc 51-53 */ + R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0); + R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0); + R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0); + + /* Transform 48-50 + Precalc 54-56 */ + R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1); + R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1); + R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1); + + /* Transform 51-53 + Precalc 57-59 */ + R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2); + + /* Transform 54-56 + Precalc 60-62 */ + R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3); + + /* Transform 57-59 + Precalc 63 */ + R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 58, 1, XW); + R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4); + + /* Transform 60-62 + Precalc 63 */ + R2(a, b, c, d, e, f, g, h, 60, 0, XW); + R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 62, 2, XW); + + /* Transform 63 */ + R2(b, c, d, a, f, g, h, e, 63, 0, XW); + + /* Update the chaining variables. */ + xorl state_h0(RSTATE), a; + xorl state_h1(RSTATE), b; + xorl state_h2(RSTATE), c; + xorl state_h3(RSTATE), d; + movl a, state_h0(RSTATE); + movl b, state_h1(RSTATE); + movl c, state_h2(RSTATE); + movl d, state_h3(RSTATE); + xorl state_h4(RSTATE), e; + xorl state_h5(RSTATE), f; + xorl state_h6(RSTATE), g; + xorl state_h7(RSTATE), h; + movl e, state_h4(RSTATE); + movl f, state_h5(RSTATE); + movl g, state_h6(RSTATE); + movl h, state_h7(RSTATE); + + cmpq $0, RNBLKS; + jne .Loop; + + vzeroall; + + movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx; + movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15; + movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14; + movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13; + movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12; + + vmovdqa %xmm0, IW_W1_ADDR(0, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(0, 0); + vmovdqa %xmm0, IW_W1_ADDR(4, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(4, 0); + vmovdqa %xmm0, IW_W1_ADDR(8, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(8, 0); + + movq %rbp, %rsp; + popq %rbp; + ret; +SYM_FUNC_END(sm3_transform_avx) diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c new file mode 100644 index 0000000000000..661b6f22ffcd8 --- /dev/null +++ b/arch/x86/crypto/sm3_avx_glue.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 Secure Hash Algorithm, AVX assembler accelerated. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Tianjia Zhang + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void sm3_transform_avx(struct sm3_state *state, + const u8 *data, int nblocks); + +static int sm3_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) { + sm3_update(sctx, data, len); + return 0; + } + + /* + * Make sure struct sm3_state begins directly with the SM3 + * 256-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0); + + kernel_fpu_begin(); + sm3_base_do_update(desc, data, len, sm3_transform_avx); + kernel_fpu_end(); + + return 0; +} + +static int sm3_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + + sm3_final(sctx, out); + return 0; + } + + kernel_fpu_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_transform_avx); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_avx_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_fpu_begin(); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_avx_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_avx_update, + .final = sm3_avx_final, + .finup = sm3_avx_finup, + .descsize = sizeof(struct sm3_state), + .base = { + .cra_name = "sm3", + .cra_driver_name = "sm3-avx", + .cra_priority = 300, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sm3_avx_mod_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX)) { + pr_info("AVX instruction are not detected.\n"); + return -ENODEV; + } + + if (!boot_cpu_has(X86_FEATURE_BMI2)) { + pr_info("BMI2 instruction are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return crypto_register_shash(&sm3_avx_alg); +} + +static void __exit sm3_avx_mod_exit(void) +{ + crypto_unregister_shash(&sm3_avx_alg); +} + +module_init(sm3_avx_mod_init); +module_exit(sm3_avx_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated"); +MODULE_ALIAS_CRYPTO("sm3"); +MODULE_ALIAS_CRYPTO("sm3-avx"); diff --git a/crypto/Kconfig b/crypto/Kconfig index b07174195e984..6dcc77e95caed 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1008,6 +1008,19 @@ config CRYPTO_SM3 http://www.oscca.gov.cn/UpFile/20101222141857786.pdf https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash +config CRYPTO_SM3_AVX_X86_64 + tristate "SM3 digest algorithm (x86_64/AVX)" + depends on X86 && 64BIT + select CRYPTO_HASH + select CRYPTO_LIB_SM3 + help + SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3). + It is part of the Chinese Commercial Cryptography suite. This is + SM3 optimized implementation using Advanced Vector Extensions (AVX) + when available. + + If unsure, say N. + config CRYPTO_STREEBOG tristate "Streebog Hash Function" select CRYPTO_HASH -- GitLab From ba2c149d0812cee653a186c9cfe451699b211c91 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 7 Jan 2022 20:07:00 +0800 Subject: [PATCH 0077/1586] crypto: tcrypt - add asynchronous speed test for SM3 tcrypt supports testing of SM3 hash algorithms that use AVX instruction acceleration. In order to add the sm3 asynchronous test to the appropriate position, shift the testcase sequence number of the multi buffer backward and start from 450. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 00149657a4bc1..82b5eef2246a2 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -2571,31 +2571,35 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) if (mode > 400 && mode < 500) break; fallthrough; case 422: + test_ahash_speed("sm3", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + fallthrough; + case 450: test_mb_ahash_speed("sha1", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; fallthrough; - case 423: + case 451: test_mb_ahash_speed("sha256", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; fallthrough; - case 424: + case 452: test_mb_ahash_speed("sha512", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; fallthrough; - case 425: + case 453: test_mb_ahash_speed("sm3", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; fallthrough; - case 426: + case 454: test_mb_ahash_speed("streebog256", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; fallthrough; - case 427: + case 455: test_mb_ahash_speed("streebog512", sec, generic_hash_speed_template, num_mb); if (mode > 400 && mode < 500) break; -- GitLab From 1e1ec11d3ec3134e05d4710f4dee5f9bd05e828d Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Mon, 10 Jan 2022 13:18:37 -0800 Subject: [PATCH 0078/1586] crypto: ccp - Ensure psp_ret is always init'd in __sev_platform_init_locked() Initialize psp_ret inside of __sev_platform_init_locked() because there are many failure paths with PSP initialization that do not set __sev_do_cmd_locked(). Fixes: e423b9d75e77: ("crypto: ccp - Move SEV_INIT retry for corrupted data") Signed-off-by: Peter Gonda Reported-by: Dan Carpenter Cc: Tom Lendacky Cc: Brijesh Singh Cc: Marc Orr Cc: Herbert Xu Cc: John Allen Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 8fd774a10edc3..6ab93dfd478a9 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -413,7 +413,7 @@ static int __sev_platform_init_locked(int *error) { struct psp_device *psp = psp_master; struct sev_device *sev; - int rc, psp_ret; + int rc, psp_ret = -1; int (*init_function)(int *error); if (!psp || !psp->sev_data) -- GitLab From 844318dfd31f7c99f6cebbdba5d6f8392c4c115c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jan 2022 10:18:06 +0300 Subject: [PATCH 0079/1586] crypto: qat - fix a signedness bug in get_service_enabled() The "ret" variable needs to be signed or there is an error message which will not be printed correctly. Fixes: 0cec19c761e5 ("crypto: qat - add support for compression for 4xxx") Signed-off-by: Dan Carpenter Acked-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 6d10edc40aca0..68d39c833332e 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -52,7 +52,7 @@ static const char *const dev_cfg_services[] = { static int get_service_enabled(struct adf_accel_dev *accel_dev) { char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; - u32 ret; + int ret; ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, ADF_SERVICES_ENABLED, services); -- GitLab From fa183433bf53ee092323005f05cb3491e4aaaa8b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 26 Jan 2022 17:12:50 -0600 Subject: [PATCH 0080/1586] regulator: maxim,max8973: Drop Tegra specifics from example There's no need to complicate examples with a platform specific macro. It also complicates example parsing to figure out the number of interrupt cells in examples (based on the bracketing). Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220126231250.1635021-1-robh@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/maxim,max8973.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml index 35c53e27f78cc..5898dcf10f067 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml @@ -113,7 +113,7 @@ examples: }; - | - #include + #include #include i2c { @@ -123,8 +123,7 @@ examples: regulator@1b { compatible = "maxim,max77621"; reg = <0x1b>; - interrupt-parent = <&gpio>; - interrupts = ; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; regulator-always-on; regulator-boot-on; -- GitLab From 20dc69ca1023b7e4c4af3c3495aa5a91e1a9be39 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 27 Jan 2022 19:58:15 +0800 Subject: [PATCH 0081/1586] spi: Fix missing unlock on error in sp7021_spi_master_transfer_one() Add the missing unlock before return from sp7021_spi_master_transfer_one() in the error handling case. Fixes: f62ca4e2a863 ("spi: Add spi driver for Sunplus SP7021") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220127115815.3148950-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-sunplus-sp7021.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c index 627b9c3024e96..cbbb1664017eb 100644 --- a/drivers/spi/spi-sunplus-sp7021.c +++ b/drivers/spi/spi-sunplus-sp7021.c @@ -351,6 +351,7 @@ static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct sp if (!wait_for_completion_interruptible_timeout(&pspim->isr_done, timeout)) { dev_err(&spi->dev, "wait_for_completion err\n"); + mutex_unlock(&pspim->buf_lock); return -ETIMEDOUT; } -- GitLab From bef8c5fdf50b573351571e94525800c41d9830f2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 Jan 2022 13:07:54 +0100 Subject: [PATCH 0082/1586] spi: qcom: geni: Simplify DMA setting As stated in [1], dma_set_mask() with a 64-bit mask will never fail if dev->dma_mask is non-NULL. So, if it fails, the 32 bits case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lkml.org/lkml/2021/6/7/398 Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/1b14e4ce91a33c16b2c655389c728071a9c9aa2e.1641643601.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index f7d905d2a90f6..4e83cc5b445d8 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -898,11 +898,8 @@ static int spi_geni_probe(struct platform_device *pdev) return irq; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (ret) { - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); - if (ret) - return dev_err_probe(dev, ret, "could not set DMA mask\n"); - } + if (ret) + return dev_err_probe(dev, ret, "could not set DMA mask\n"); base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) -- GitLab From 7291e7d686308b4a77f43225eaf1753cb20cc692 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:21 -0500 Subject: [PATCH 0083/1586] regulator: rpi-panel: Register with a unique backlight name There's no reason why 2 Raspberry Pi DSI displays can't be attached to a Pi Compute Module, so the backlight names need to be unique. Use the parent dev_name. It's not as readable, but is unique. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-2-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/rpi-panel-attiny-regulator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index ee46bfbf5eee7..370b9ae363dd2 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -181,8 +181,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, props.type = BACKLIGHT_RAW; props.max_brightness = 0xff; - bl = devm_backlight_device_register(&i2c->dev, - "7inch-touchscreen-panel-bl", + bl = devm_backlight_device_register(&i2c->dev, dev_name(&i2c->dev), &i2c->dev, regmap, &attiny_bl, &props); if (IS_ERR(bl)) -- GitLab From 5665eee7a3800430e7dc3ef6f25722476b603186 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:22 -0500 Subject: [PATCH 0084/1586] regulator: rpi-panel: Handle I2C errors/timing to the Atmel The Atmel is doing some things in the I2C ISR, during which period it will not respond to further commands. This is particularly true of the POWERON command. Increase delays appropriately, and retry should I2C errors be reported. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-3-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- .../regulator/rpi-panel-attiny-regulator.c | 56 +++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 370b9ae363dd2..00fb69efcfa25 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -37,11 +37,24 @@ static const struct regmap_config attiny_regmap_config = { static int attiny_lcd_power_enable(struct regulator_dev *rdev) { unsigned int data; + int ret, i; regmap_write(rdev->regmap, REG_POWERON, 1); + msleep(80); + /* Wait for nPWRDWN to go low to indicate poweron is done. */ - regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data, - data & BIT(0), 10, 1000000); + for (i = 0; i < 20; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) { + if (data & BIT(0)) + break; + } + usleep_range(10000, 12000); + } + usleep_range(10000, 12000); + + if (ret) + pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret); /* Default to the same orientation as the closed source * firmware used for the panel. Runtime rotation @@ -57,23 +70,34 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev) { regmap_write(rdev->regmap, REG_PWM, 0); regmap_write(rdev->regmap, REG_POWERON, 0); - udelay(1); + msleep(30); return 0; } static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) { unsigned int data; - int ret; + int ret, i; - ret = regmap_read(rdev->regmap, REG_POWERON, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_POWERON, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } if (ret < 0) return ret; if (!(data & BIT(0))) return 0; - ret = regmap_read(rdev->regmap, REG_PORTB, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } + if (ret < 0) return ret; @@ -103,20 +127,32 @@ static int attiny_update_status(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); int brightness = bl->props.brightness; + int ret, i; if (bl->props.power != FB_BLANK_UNBLANK || bl->props.fb_blank != FB_BLANK_UNBLANK) brightness = 0; - return regmap_write(regmap, REG_PWM, brightness); + for (i = 0; i < 10; i++) { + ret = regmap_write(regmap, REG_PWM, brightness); + if (!ret) + break; + } + + return ret; } static int attiny_get_brightness(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); - int ret, brightness; + int ret, brightness, i; + + for (i = 0; i < 10; i++) { + ret = regmap_read(regmap, REG_PWM, &brightness); + if (!ret) + break; + } - ret = regmap_read(regmap, REG_PWM, &brightness); if (ret) return ret; @@ -166,7 +202,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, } regmap_write(regmap, REG_POWERON, 0); - mdelay(1); + msleep(30); config.dev = &i2c->dev; config.regmap = regmap; -- GitLab From 00440bcd211a3cac686b730447c4efa3d4c84c2a Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:23 -0500 Subject: [PATCH 0085/1586] regulator: rpi-panel: Serialise operations. The driver was using the regmap lock to serialise the individual accesses, but we really need to protect the timings of enabling the regulators, including any communication with the Atmel. Use a mutex within the driver to control overall accesses to the Atmel, instead of the regmap lock. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-4-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- .../regulator/rpi-panel-attiny-regulator.c | 91 ++++++++++++++++--- 1 file changed, 80 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 00fb69efcfa25..a4af7adad2b5e 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -27,18 +27,28 @@ #define REG_POWERON 0x85 #define REG_PWM 0x86 +struct attiny_lcd { + /* lock to serialise overall accesses to the Atmel */ + struct mutex lock; + struct regmap *regmap; +}; + static const struct regmap_config attiny_regmap_config = { .reg_bits = 8, .val_bits = 8, + .disable_locking = 1, .max_register = REG_PWM, .cache_type = REGCACHE_NONE, }; static int attiny_lcd_power_enable(struct regulator_dev *rdev) { + struct mutex *lock = rdev_get_drvdata(rdev); unsigned int data; int ret, i; + mutex_lock(lock); + regmap_write(rdev->regmap, REG_POWERON, 1); msleep(80); @@ -63,33 +73,49 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev) */ regmap_write(rdev->regmap, REG_PORTA, BIT(2)); + mutex_unlock(lock); + return 0; } static int attiny_lcd_power_disable(struct regulator_dev *rdev) { + struct mutex *lock = rdev_get_drvdata(rdev); + + mutex_lock(lock); + regmap_write(rdev->regmap, REG_PWM, 0); regmap_write(rdev->regmap, REG_POWERON, 0); msleep(30); + + mutex_unlock(lock); + return 0; } static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) { + struct mutex *lock = rdev_get_drvdata(rdev); unsigned int data; int ret, i; + mutex_lock(lock); + for (i = 0; i < 10; i++) { ret = regmap_read(rdev->regmap, REG_POWERON, &data); if (!ret) break; usleep_range(10000, 12000); } - if (ret < 0) + if (ret < 0) { + mutex_unlock(lock); return ret; + } - if (!(data & BIT(0))) + if (!(data & BIT(0))) { + mutex_unlock(lock); return 0; + } for (i = 0; i < 10; i++) { ret = regmap_read(rdev->regmap, REG_PORTB, &data); @@ -98,6 +124,8 @@ static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) usleep_range(10000, 12000); } + mutex_unlock(lock); + if (ret < 0) return ret; @@ -125,10 +153,13 @@ static const struct regulator_desc attiny_regulator = { static int attiny_update_status(struct backlight_device *bl) { - struct regmap *regmap = bl_get_data(bl); + struct attiny_lcd *state = bl_get_data(bl); + struct regmap *regmap = state->regmap; int brightness = bl->props.brightness; int ret, i; + mutex_lock(&state->lock); + if (bl->props.power != FB_BLANK_UNBLANK || bl->props.fb_blank != FB_BLANK_UNBLANK) brightness = 0; @@ -139,20 +170,27 @@ static int attiny_update_status(struct backlight_device *bl) break; } + mutex_unlock(&state->lock); + return ret; } static int attiny_get_brightness(struct backlight_device *bl) { - struct regmap *regmap = bl_get_data(bl); + struct attiny_lcd *state = bl_get_data(bl); + struct regmap *regmap = state->regmap; int ret, brightness, i; + mutex_lock(&state->lock); + for (i = 0; i < 10; i++) { ret = regmap_read(regmap, REG_PWM, &brightness); if (!ret) break; } + mutex_unlock(&state->lock); + if (ret) return ret; @@ -174,22 +212,30 @@ static int attiny_i2c_probe(struct i2c_client *i2c, struct regulator_config config = { }; struct backlight_device *bl; struct regulator_dev *rdev; + struct attiny_lcd *state; struct regmap *regmap; unsigned int data; int ret; + state = devm_kzalloc(&i2c->dev, sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + mutex_init(&state->lock); + i2c_set_clientdata(i2c, state); + regmap = devm_regmap_init_i2c(i2c, &attiny_regmap_config); if (IS_ERR(regmap)) { ret = PTR_ERR(regmap); dev_err(&i2c->dev, "Failed to allocate register map: %d\n", ret); - return ret; + goto error; } ret = regmap_read(regmap, REG_ID, &data); if (ret < 0) { dev_err(&i2c->dev, "Failed to read REG_ID reg: %d\n", ret); - return ret; + goto error; } switch (data) { @@ -198,7 +244,8 @@ static int attiny_i2c_probe(struct i2c_client *i2c, break; default: dev_err(&i2c->dev, "Unknown Atmel firmware revision: 0x%02x\n", data); - return -ENODEV; + ret = -ENODEV; + goto error; } regmap_write(regmap, REG_POWERON, 0); @@ -208,23 +255,44 @@ static int attiny_i2c_probe(struct i2c_client *i2c, config.regmap = regmap; config.of_node = i2c->dev.of_node; config.init_data = &attiny_regulator_default; + config.driver_data = &state->lock; rdev = devm_regulator_register(&i2c->dev, &attiny_regulator, &config); if (IS_ERR(rdev)) { dev_err(&i2c->dev, "Failed to register ATTINY regulator\n"); - return PTR_ERR(rdev); + ret = PTR_ERR(rdev); + goto error; } props.type = BACKLIGHT_RAW; props.max_brightness = 0xff; + + state->regmap = regmap; + bl = devm_backlight_device_register(&i2c->dev, dev_name(&i2c->dev), - &i2c->dev, regmap, &attiny_bl, + &i2c->dev, state, &attiny_bl, &props); - if (IS_ERR(bl)) - return PTR_ERR(bl); + if (IS_ERR(bl)) { + ret = PTR_ERR(bl); + goto error; + } bl->props.brightness = 0xff; + return 0; + +error: + mutex_destroy(&state->lock); + + return ret; +} + +static int attiny_i2c_remove(struct i2c_client *client) +{ + struct attiny_lcd *state = i2c_get_clientdata(client); + + mutex_destroy(&state->lock); + return 0; } @@ -240,6 +308,7 @@ static struct i2c_driver attiny_regulator_driver = { .of_match_table = of_match_ptr(attiny_dt_ids), }, .probe = attiny_i2c_probe, + .remove = attiny_i2c_remove, }; module_i2c_driver(attiny_regulator_driver); -- GitLab From 89339a2ae7608138dbcccda8db67a87870550cbe Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:24 -0500 Subject: [PATCH 0086/1586] regulator: rpi-panel: Ensure the backlight is off during probe. The initial state of the Atmel is not defined, so ensure the backlight PWM is set to 0 by default. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-5-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/rpi-panel-attiny-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index a4af7adad2b5e..b3629a1e0e502 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -250,6 +250,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, regmap_write(regmap, REG_POWERON, 0); msleep(30); + regmap_write(regmap, REG_PWM, 0); config.dev = &i2c->dev; config.regmap = regmap; -- GitLab From 4866e35e48e6bd2bef1c567b05105e7fb3493ff9 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:25 -0500 Subject: [PATCH 0087/1586] regulator: rpi-panel: Convert to drive lines directly The Atmel was doing a load of automatic sequencing of control lines, however it was combining the touch controller's reset with the bridge/panel control. Change to control the control signals directly rather than through the automatic POWERON control. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-6-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- .../regulator/rpi-panel-attiny-regulator.c | 111 ++++++++++-------- 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index b3629a1e0e502..995915ca4a9be 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -21,11 +21,28 @@ /* I2C registers of the Atmel microcontroller. */ #define REG_ID 0x80 #define REG_PORTA 0x81 -#define REG_PORTA_HF BIT(2) -#define REG_PORTA_VF BIT(3) #define REG_PORTB 0x82 +#define REG_PORTC 0x83 #define REG_POWERON 0x85 #define REG_PWM 0x86 +#define REG_ADDR_L 0x8c +#define REG_ADDR_H 0x8d +#define REG_WRITE_DATA_H 0x90 +#define REG_WRITE_DATA_L 0x91 + +#define PA_LCD_DITHB BIT(0) +#define PA_LCD_MODE BIT(1) +#define PA_LCD_LR BIT(2) +#define PA_LCD_UD BIT(3) + +#define PB_BRIDGE_PWRDNX_N BIT(0) +#define PB_LCD_VCC_N BIT(1) +#define PB_LCD_MAIN BIT(7) + +#define PC_LED_EN BIT(0) +#define PC_RST_TP_N BIT(1) +#define PC_RST_LCD_N BIT(2) +#define PC_RST_BRIDGE_N BIT(3) struct attiny_lcd { /* lock to serialise overall accesses to the Atmel */ @@ -37,99 +54,91 @@ static const struct regmap_config attiny_regmap_config = { .reg_bits = 8, .val_bits = 8, .disable_locking = 1, - .max_register = REG_PWM, + .max_register = REG_WRITE_DATA_L, .cache_type = REGCACHE_NONE, }; static int attiny_lcd_power_enable(struct regulator_dev *rdev) { - struct mutex *lock = rdev_get_drvdata(rdev); - unsigned int data; - int ret, i; - - mutex_lock(lock); - - regmap_write(rdev->regmap, REG_POWERON, 1); - msleep(80); + struct attiny_lcd *state = rdev_get_drvdata(rdev); - /* Wait for nPWRDWN to go low to indicate poweron is done. */ - for (i = 0; i < 20; i++) { - ret = regmap_read(rdev->regmap, REG_PORTB, &data); - if (!ret) { - if (data & BIT(0)) - break; - } - usleep_range(10000, 12000); - } - usleep_range(10000, 12000); + mutex_lock(&state->lock); - if (ret) - pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret); + /* Ensure bridge, and tp stay in reset */ + regmap_write(rdev->regmap, REG_PORTC, 0); + usleep_range(5000, 10000); /* Default to the same orientation as the closed source * firmware used for the panel. Runtime rotation * configuration will be supported using VC4's plane * orientation bits. */ - regmap_write(rdev->regmap, REG_PORTA, BIT(2)); + regmap_write(rdev->regmap, REG_PORTA, PA_LCD_LR); + usleep_range(5000, 10000); + regmap_write(rdev->regmap, REG_PORTB, PB_LCD_MAIN); + usleep_range(5000, 10000); + /* Bring controllers out of reset */ + regmap_write(rdev->regmap, REG_PORTC, + PC_LED_EN | PC_RST_BRIDGE_N | PC_RST_LCD_N | PC_RST_TP_N); + + msleep(80); + + regmap_write(rdev->regmap, REG_ADDR_H, 0x04); + usleep_range(5000, 8000); + regmap_write(rdev->regmap, REG_ADDR_L, 0x7c); + usleep_range(5000, 8000); + regmap_write(rdev->regmap, REG_WRITE_DATA_H, 0x00); + usleep_range(5000, 8000); + regmap_write(rdev->regmap, REG_WRITE_DATA_L, 0x00); + + msleep(100); - mutex_unlock(lock); + mutex_unlock(&state->lock); return 0; } static int attiny_lcd_power_disable(struct regulator_dev *rdev) { - struct mutex *lock = rdev_get_drvdata(rdev); + struct attiny_lcd *state = rdev_get_drvdata(rdev); - mutex_lock(lock); + mutex_lock(&state->lock); regmap_write(rdev->regmap, REG_PWM, 0); - regmap_write(rdev->regmap, REG_POWERON, 0); + usleep_range(5000, 10000); + regmap_write(rdev->regmap, REG_PORTA, 0); + usleep_range(5000, 10000); + regmap_write(rdev->regmap, REG_PORTB, PB_LCD_VCC_N); + usleep_range(5000, 10000); + regmap_write(rdev->regmap, REG_PORTC, 0); msleep(30); - mutex_unlock(lock); + mutex_unlock(&state->lock); return 0; } static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) { - struct mutex *lock = rdev_get_drvdata(rdev); + struct attiny_lcd *state = rdev_get_drvdata(rdev); unsigned int data; int ret, i; - mutex_lock(lock); - - for (i = 0; i < 10; i++) { - ret = regmap_read(rdev->regmap, REG_POWERON, &data); - if (!ret) - break; - usleep_range(10000, 12000); - } - if (ret < 0) { - mutex_unlock(lock); - return ret; - } - - if (!(data & BIT(0))) { - mutex_unlock(lock); - return 0; - } + mutex_lock(&state->lock); for (i = 0; i < 10; i++) { - ret = regmap_read(rdev->regmap, REG_PORTB, &data); + ret = regmap_read(rdev->regmap, REG_PORTC, &data); if (!ret) break; usleep_range(10000, 12000); } - mutex_unlock(lock); + mutex_unlock(&state->lock); if (ret < 0) return ret; - return data & BIT(0); + return data & PC_RST_BRIDGE_N; } static const struct regulator_init_data attiny_regulator_default = { @@ -256,7 +265,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, config.regmap = regmap; config.of_node = i2c->dev.of_node; config.init_data = &attiny_regulator_default; - config.driver_data = &state->lock; + config.driver_data = state; rdev = devm_regulator_register(&i2c->dev, &attiny_regulator, &config); if (IS_ERR(rdev)) { -- GitLab From 8c518eb4039102445b1b7bd6626aba0fef65b753 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:26 -0500 Subject: [PATCH 0088/1586] regulator: rpi-panel: Add GPIO control for panel and touch resets We need independent control of the resets for the panel&bridge, vs the touch controller. Expose the reset lines that are on the Atmel's port C via the GPIO API so that they can be controlled appropriately. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-7-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 1 + .../regulator/rpi-panel-attiny-regulator.c | 115 +++++++++++++++--- 2 files changed, 98 insertions(+), 18 deletions(-) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 1c35fed20d341..22503e4f53272 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -984,6 +984,7 @@ config REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY tristate "Raspberry Pi 7-inch touchscreen panel ATTINY regulator" depends on BACKLIGHT_CLASS_DEVICE depends on I2C + depends on OF_GPIO select REGMAP_I2C help This driver supports ATTINY regulator on the Raspberry Pi 7-inch diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 995915ca4a9be..998233f14085e 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -44,10 +45,30 @@ #define PC_RST_LCD_N BIT(2) #define PC_RST_BRIDGE_N BIT(3) +enum gpio_signals { + RST_BRIDGE_N, /* TC358762 bridge reset */ + RST_TP_N, /* Touch controller reset */ + NUM_GPIO +}; + +struct gpio_signal_mappings { + unsigned int reg; + unsigned int mask; +}; + +static const struct gpio_signal_mappings mappings[NUM_GPIO] = { + [RST_BRIDGE_N] = { REG_PORTC, PC_RST_BRIDGE_N | PC_RST_LCD_N }, + [RST_TP_N] = { REG_PORTC, PC_RST_TP_N }, +}; + struct attiny_lcd { /* lock to serialise overall accesses to the Atmel */ struct mutex lock; struct regmap *regmap; + bool gpio_states[NUM_GPIO]; + u8 port_states[3]; + + struct gpio_chip gc; }; static const struct regmap_config attiny_regmap_config = { @@ -58,6 +79,17 @@ static const struct regmap_config attiny_regmap_config = { .cache_type = REGCACHE_NONE, }; +static int attiny_set_port_state(struct attiny_lcd *state, int reg, u8 val) +{ + state->port_states[reg - REG_PORTA] = val; + return regmap_write(state->regmap, reg, val); +}; + +static u8 attiny_get_port_state(struct attiny_lcd *state, int reg) +{ + return state->port_states[reg - REG_PORTA]; +}; + static int attiny_lcd_power_enable(struct regulator_dev *rdev) { struct attiny_lcd *state = rdev_get_drvdata(rdev); @@ -65,7 +97,7 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev) mutex_lock(&state->lock); /* Ensure bridge, and tp stay in reset */ - regmap_write(rdev->regmap, REG_PORTC, 0); + attiny_set_port_state(state, REG_PORTC, 0); usleep_range(5000, 10000); /* Default to the same orientation as the closed source @@ -73,26 +105,16 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev) * configuration will be supported using VC4's plane * orientation bits. */ - regmap_write(rdev->regmap, REG_PORTA, PA_LCD_LR); + attiny_set_port_state(state, REG_PORTA, PA_LCD_LR); usleep_range(5000, 10000); - regmap_write(rdev->regmap, REG_PORTB, PB_LCD_MAIN); + /* Main regulator on, and power to the panel (LCD_VCC_N) */ + attiny_set_port_state(state, REG_PORTB, PB_LCD_MAIN); usleep_range(5000, 10000); /* Bring controllers out of reset */ - regmap_write(rdev->regmap, REG_PORTC, - PC_LED_EN | PC_RST_BRIDGE_N | PC_RST_LCD_N | PC_RST_TP_N); + attiny_set_port_state(state, REG_PORTC, PC_LED_EN); msleep(80); - regmap_write(rdev->regmap, REG_ADDR_H, 0x04); - usleep_range(5000, 8000); - regmap_write(rdev->regmap, REG_ADDR_L, 0x7c); - usleep_range(5000, 8000); - regmap_write(rdev->regmap, REG_WRITE_DATA_H, 0x00); - usleep_range(5000, 8000); - regmap_write(rdev->regmap, REG_WRITE_DATA_L, 0x00); - - msleep(100); - mutex_unlock(&state->lock); return 0; @@ -106,11 +128,12 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev) regmap_write(rdev->regmap, REG_PWM, 0); usleep_range(5000, 10000); - regmap_write(rdev->regmap, REG_PORTA, 0); + + attiny_set_port_state(state, REG_PORTA, 0); usleep_range(5000, 10000); - regmap_write(rdev->regmap, REG_PORTB, PB_LCD_VCC_N); + attiny_set_port_state(state, REG_PORTB, PB_LCD_VCC_N); usleep_range(5000, 10000); - regmap_write(rdev->regmap, REG_PORTC, 0); + attiny_set_port_state(state, REG_PORTC, 0); msleep(30); mutex_unlock(&state->lock); @@ -211,6 +234,45 @@ static const struct backlight_ops attiny_bl = { .get_brightness = attiny_get_brightness, }; +static int attiny_gpio_get_direction(struct gpio_chip *gc, unsigned int off) +{ + return GPIO_LINE_DIRECTION_OUT; +} + +static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val) +{ + struct attiny_lcd *state = gpiochip_get_data(gc); + u8 last_val; + + if (off >= NUM_GPIO) + return; + + mutex_lock(&state->lock); + + last_val = attiny_get_port_state(state, mappings[off].reg); + if (val) + last_val |= mappings[off].mask; + else + last_val &= ~mappings[off].mask; + + attiny_set_port_state(state, mappings[off].reg, last_val); + + if (off == RST_BRIDGE_N && val) { + usleep_range(5000, 8000); + regmap_write(state->regmap, REG_ADDR_H, 0x04); + usleep_range(5000, 8000); + regmap_write(state->regmap, REG_ADDR_L, 0x7c); + usleep_range(5000, 8000); + regmap_write(state->regmap, REG_WRITE_DATA_H, 0x00); + usleep_range(5000, 8000); + regmap_write(state->regmap, REG_WRITE_DATA_L, 0x00); + + msleep(100); + } + + mutex_unlock(&state->lock); +} + /* * I2C driver interface functions */ @@ -289,6 +351,23 @@ static int attiny_i2c_probe(struct i2c_client *i2c, bl->props.brightness = 0xff; + state->gc.parent = &i2c->dev; + state->gc.label = i2c->name; + state->gc.owner = THIS_MODULE; + state->gc.of_node = i2c->dev.of_node; + state->gc.base = -1; + state->gc.ngpio = NUM_GPIO; + + state->gc.set = attiny_gpio_set; + state->gc.get_direction = attiny_gpio_get_direction; + state->gc.can_sleep = true; + + ret = devm_gpiochip_add_data(&i2c->dev, &state->gc, state); + if (ret) { + dev_err(&i2c->dev, "Failed to create gpiochip: %d\n", ret); + goto error; + } + return 0; error: -- GitLab From 1d746d448f421094a71ba634399d2ee61669513f Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:27 -0500 Subject: [PATCH 0089/1586] regulator: rpi-panel: Remove get_brightness hook The driver was implementing a get_brightness function that tried to read back the PWM setting of the display to report as the current brightness. The controller on the display does not support that, therefore we end up reporting a brightness of 0, and that confuses systemd's backlight service. Remove the hook so that the framework returns the current brightness automatically. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-8-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- .../regulator/rpi-panel-attiny-regulator.c | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 998233f14085e..8090b9a485b5e 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -207,31 +207,8 @@ static int attiny_update_status(struct backlight_device *bl) return ret; } -static int attiny_get_brightness(struct backlight_device *bl) -{ - struct attiny_lcd *state = bl_get_data(bl); - struct regmap *regmap = state->regmap; - int ret, brightness, i; - - mutex_lock(&state->lock); - - for (i = 0; i < 10; i++) { - ret = regmap_read(regmap, REG_PWM, &brightness); - if (!ret) - break; - } - - mutex_unlock(&state->lock); - - if (ret) - return ret; - - return brightness; -} - static const struct backlight_ops attiny_bl = { .update_status = attiny_update_status, - .get_brightness = attiny_get_brightness, }; static int attiny_gpio_get_direction(struct gpio_chip *gc, unsigned int off) -- GitLab From 5fa4e8ea649009566a1b080f836ce23d4ce0c416 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:28 -0500 Subject: [PATCH 0090/1586] regulator/rpi-panel-attiny: Use the regmap cache The I2C to the Atmel is very fussy, and locks up easily on Pi0-3 particularly on reads. The LCD power status is controlled solely by this driver, so rather than reading it back from the Atmel, use the regmap cache to avoid reading values. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-9-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/rpi-panel-attiny-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 8090b9a485b5e..6e408a4b2c219 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -76,7 +76,7 @@ static const struct regmap_config attiny_regmap_config = { .val_bits = 8, .disable_locking = 1, .max_register = REG_WRITE_DATA_L, - .cache_type = REGCACHE_NONE, + .cache_type = REGCACHE_RBTREE, }; static int attiny_set_port_state(struct attiny_lcd *state, int reg, u8 val) -- GitLab From e4a7e3f741f797d93d97a153b0f6a862d19a1304 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 24 Jan 2022 17:01:29 -0500 Subject: [PATCH 0091/1586] regulator/rpi-panel-attiny: Use two transactions for I2C read The I2C to the Atmel is very fussy, and locks up easily on Pi0-3 particularly on reads. If running at 100kHz on Pi3, reading the ID register generally locks up the Atmel, but splitting the register select write and read into two transactions is reliable. Signed-off-by: Dave Stevenson Signed-off-by: Detlev Casanova Link: https://lore.kernel.org/r/20220124220129.158891-10-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- .../regulator/rpi-panel-attiny-regulator.c | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 6e408a4b2c219..f7df0f4b2f874 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -250,6 +250,39 @@ static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val) mutex_unlock(&state->lock); } +static int attiny_i2c_read(struct i2c_client *client, u8 reg, unsigned int *buf) +{ + struct i2c_msg msgs[1]; + u8 addr_buf[1] = { reg }; + u8 data_buf[1] = { 0, }; + int ret; + + /* Write register address */ + msgs[0].addr = client->addr; + msgs[0].flags = 0; + msgs[0].len = ARRAY_SIZE(addr_buf); + msgs[0].buf = addr_buf; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret != ARRAY_SIZE(msgs)) + return -EIO; + + usleep_range(5000, 10000); + + /* Read data from register */ + msgs[0].addr = client->addr; + msgs[0].flags = I2C_M_RD; + msgs[0].len = 1; + msgs[0].buf = data_buf; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret != ARRAY_SIZE(msgs)) + return -EIO; + + *buf = data_buf[0]; + return 0; +} + /* * I2C driver interface functions */ @@ -280,7 +313,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, goto error; } - ret = regmap_read(regmap, REG_ID, &data); + ret = attiny_i2c_read(i2c, REG_ID, &data); if (ret < 0) { dev_err(&i2c->dev, "Failed to read REG_ID reg: %d\n", ret); goto error; -- GitLab From 735efea69d36347c6c7a6bf6e13f032d09c63c6f Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Wed, 12 Jan 2022 08:05:44 +0000 Subject: [PATCH 0092/1586] crypto: ccp - remove redundant ret variable Return value from ccp_crypto_enqueue_request() directly instead of taking this in another redundant variable. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: CGEL ZTE Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-aes.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index e6dcd8cedd53e..bed331953ff94 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -69,7 +69,6 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt) struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; - int ret; if (!ctx->u.aes.key_len) return -EINVAL; @@ -104,9 +103,7 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt) rctx->cmd.u.aes.src_len = req->cryptlen; rctx->cmd.u.aes.dst = req->dst; - ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); - - return ret; + return ccp_crypto_enqueue_request(&req->base, &rctx->cmd); } static int ccp_aes_encrypt(struct skcipher_request *req) -- GitLab From ab7d88549e2f7ae116afd303f32e1950cb790a1d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Jan 2022 15:05:03 +0100 Subject: [PATCH 0093/1586] hwrng: cavium - HW_RANDOM_CAVIUM should depend on ARCH_THUNDER The Cavium ThunderX Random Number Generator is only present on Cavium ThunderX SoCs, and not available as an independent PCIe endpoint. Hence add a dependency on ARCH_THUNDER, to prevent asking the user about this driver when configuring a kernel without Cavium Thunder SoC support. Fixes: cc2f1908c6b8f625 ("hwrng: cavium - Add Cavium HWRNG driver for ThunderX SoC.") Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 9704963f9d500..a087156a58186 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -401,7 +401,7 @@ config HW_RANDOM_MESON config HW_RANDOM_CAVIUM tristate "Cavium ThunderX Random Number Generator support" - depends on HW_RANDOM && PCI && ARM64 + depends on HW_RANDOM && PCI && ARCH_THUNDER default HW_RANDOM help This driver provides kernel-side support for the Random Number -- GitLab From 8fc5f2ad896b6cc8b7631d546efcf8e358872f76 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 14 Jan 2022 17:40:30 +1100 Subject: [PATCH 0094/1586] crypto: testmgr - Move crypto_simd_disabled_for_test out As testmgr is part of cryptomgr which was designed to be unloadable as a module, it shouldn't export any symbols for other crypto modules to use as that would prevent it from being unloaded. All its functionality is meant to be accessed through notifiers. The symbol crypto_simd_disabled_for_test was added to testmgr which caused it to be pinned as a module if its users were also loaded. This patch moves it out of testmgr and into crypto/algapi.c so cryptomgr can again be unloaded and replaced on demand. Signed-off-by: Herbert Xu Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algapi.c | 6 ++++++ crypto/testmgr.c | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index a366cb3e8aa18..9f15e11f5d73b 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -21,6 +22,11 @@ static LIST_HEAD(crypto_template_list); +#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS +DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test); +EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test); +#endif + static inline void crypto_check_module_sig(struct module *mod) { if (fips_enabled && mod && !module_sig_ok(mod)) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5831d4bbc64fa..3a5a3e5cb77bf 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -55,9 +55,6 @@ MODULE_PARM_DESC(noextratests, "disable expensive crypto self-tests"); static unsigned int fuzz_iterations = 100; module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); - -DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test); -EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test); #endif #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS -- GitLab From 90be188b65f728f82bacdddb43c7899f26da7b1c Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Fri, 14 Jan 2022 18:17:19 -0600 Subject: [PATCH 0095/1586] crypto: x86 - Convert to SPDX identifier Use SPDX-License-Identifier instead of a verbose license text and update external link. Cc: James Guilford Cc: Sean Gulley Cc: Chandramouli Narayanan Signed-off-by: Nathan Huckleberry Signed-off-by: Herbert Xu --- arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 63 ++++--------------------- 1 file changed, 10 insertions(+), 53 deletions(-) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index c799838242a69..43852ba6e19c7 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -1,65 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ /* - * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) - * - * This is AES128/192/256 CTR mode optimization implementation. It requires - * the support of Intel(R) AESNI and AVX instructions. - * - * This work was inspired by the AES CTR mode optimization published - * in Intel Optimized IPSEC Cryptograhpic library. - * Additional information on it can be found at: - * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY + * AES CTR mode by8 optimization with AVX instructions. (x86_64) * * Copyright(c) 2014 Intel Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * * Contact Information: * James Guilford * Sean Gulley * Chandramouli Narayanan + */ +/* + * This is AES128/192/256 CTR mode optimization implementation. It requires + * the support of Intel(R) AESNI and AVX instructions. * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * This work was inspired by the AES CTR mode optimization published + * in Intel Optimized IPSEC Cryptographic library. + * Additional information on it can be found at: + * https://github.com/intel/intel-ipsec-mb */ #include -- GitLab From 881fc7fba6c3e7d77d608b9a50b01a89d5e0c61b Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sat, 15 Jan 2022 10:07:14 +0000 Subject: [PATCH 0096/1586] crypto: sun8i-ss - really disable hash on A80 When adding hashes support to sun8i-ss, I have added them only on A83T. But I forgot that 0 is a valid algorithm ID, so hashes are enabled on A80 but with an incorrect ID. Anyway, even with correct IDs, hashes do not work on A80 and I cannot find why. So let's disable all of them on A80. Fixes: d9b45418a917 ("crypto: sun8i-ss - support hash algorithms") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 80e89066dbd1a..319fe3279a716 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -30,6 +30,8 @@ static const struct ss_variant ss_a80_variant = { .alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES, }, + .alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, + }, .op_mode = { SS_OP_ECB, SS_OP_CBC, }, .ss_clks = { -- GitLab From a88592cc27efd4ed0ceba79016eb4a3ddb90e05e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 18 Jan 2022 16:31:29 +1100 Subject: [PATCH 0097/1586] crypto: kdf - Select hmac in addition to sha256 In addition to sha256 we must also enable hmac for the kdf self-test to work. Reported-by: kernel test robot Fixes: 304b4acee2f0 ("crypto: kdf - select SHA-256 required...") Fixes: 026a733e6659 ("crypto: kdf - add SP800-108 counter key...") Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index 6dcc77e95caed..fa1741bb568f9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1861,6 +1861,7 @@ config CRYPTO_JITTERENTROPY config CRYPTO_KDF800108_CTR tristate + select CRYPTO_HMAC select CRYPTO_SHA256 config CRYPTO_USER_API -- GitLab From 163a4e7fa73c3a617217e559eb5a9b58f1878bc7 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Tue, 18 Jan 2022 12:23:48 +0200 Subject: [PATCH 0098/1586] lib/crc32: remove unneeded casts Casts were added in commit 8f243af42ade ("sections: fix const sections for crc32 table") to cope with the tables not being const. They are no longer required since commit f5e38b9284e1 ("lib: crc32: constify crc32 lookup table"). Signed-off-by: Kevin Bracey Signed-off-by: Herbert Xu --- lib/crc32.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/crc32.c b/lib/crc32.c index 2a68dfd3b96c8..7f062a2639df4 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -194,13 +194,11 @@ u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) #else u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { - return crc32_le_generic(crc, p, len, - (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); + return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); } u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { - return crc32_le_generic(crc, p, len, - (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); + return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); } #endif EXPORT_SYMBOL(crc32_le); @@ -339,8 +337,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) #else u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { - return crc32_be_generic(crc, p, len, - (const u32 (*)[256])crc32table_be, CRC32_POLY_BE); + return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } #endif EXPORT_SYMBOL(crc32_be); -- GitLab From 5cb29be47d44d6090f1cdc21f439439dc43b471f Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Tue, 18 Jan 2022 12:23:49 +0200 Subject: [PATCH 0099/1586] lib/crc32: Make crc32_be weak for arch override crc32_le and __crc32c_le can be overridden - extend this to crc32_be. Signed-off-by: Kevin Bracey Signed-off-by: Herbert Xu --- lib/crc32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/crc32.c b/lib/crc32.c index 7f062a2639df4..5649847d0a8d0 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -206,6 +206,7 @@ EXPORT_SYMBOL(__crc32c_le); u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); +u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); /* * This multiplies the polynomials x and y modulo the given modulus. @@ -330,12 +331,12 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, } #if CRC_BE_BITS == 1 -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } -- GitLab From 1b3dce8b8ab30bb9f1401acefa08a47dc46f0813 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Tue, 18 Jan 2022 12:23:50 +0200 Subject: [PATCH 0100/1586] lib/crc32test: correct printed bytes count crc32c_le self test had a stray multiply by two inherited from the crc32_le+crc32_be test loop. Signed-off-by: Kevin Bracey Signed-off-by: Herbert Xu --- lib/crc32test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crc32test.c b/lib/crc32test.c index 61ddce2cff777..9b4af79412c4a 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -675,7 +675,7 @@ static int __init crc32c_test(void) /* pre-warm the cache */ for (i = 0; i < 100; i++) { - bytes += 2*test[i].length; + bytes += test[i].length; crc ^= __crc32c_le(test[i].crc, test_buf + test[i].start, test[i].length); -- GitLab From 5f2f5eaa3e373c3a07a4f3552fe13d9cde5e23e5 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Tue, 18 Jan 2022 12:23:51 +0200 Subject: [PATCH 0101/1586] arm64: lib: accelerate crc32_be It makes no sense to leave crc32_be using the generic code while we only accelerate the little-endian ops. Even though the big-endian form doesn't fit as smoothly into the arm64, we can speed it up and avoid hitting the D cache. Tested on Cortex-A53. Without acceleration: crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64 crc32: self tests passed, processed 225944 bytes in 192240 nsec crc32c: CRC_LE_BITS = 64 crc32c: self tests passed, processed 112972 bytes in 21360 nsec With acceleration: crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64 crc32: self tests passed, processed 225944 bytes in 53480 nsec crc32c: CRC_LE_BITS = 64 crc32c: self tests passed, processed 112972 bytes in 21480 nsec Signed-off-by: Kevin Bracey Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Herbert Xu --- arch/arm64/lib/crc32.S | 87 +++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 14 deletions(-) diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index 0f9e10ecda231..8340dccff46ff 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -11,7 +11,44 @@ .arch armv8-a+crc - .macro __crc32, c + .macro byteorder, reg, be + .if \be +CPU_LE( rev \reg, \reg ) + .else +CPU_BE( rev \reg, \reg ) + .endif + .endm + + .macro byteorder16, reg, be + .if \be +CPU_LE( rev16 \reg, \reg ) + .else +CPU_BE( rev16 \reg, \reg ) + .endif + .endm + + .macro bitorder, reg, be + .if \be + rbit \reg, \reg + .endif + .endm + + .macro bitorder16, reg, be + .if \be + rbit \reg, \reg + lsr \reg, \reg, #16 + .endif + .endm + + .macro bitorder8, reg, be + .if \be + rbit \reg, \reg + lsr \reg, \reg, #24 + .endif + .endm + + .macro __crc32, c, be=0 + bitorder w0, \be cmp x2, #16 b.lt 8f // less than 16 bytes @@ -24,10 +61,14 @@ add x8, x8, x1 add x1, x1, x7 ldp x5, x6, [x8] -CPU_BE( rev x3, x3 ) -CPU_BE( rev x4, x4 ) -CPU_BE( rev x5, x5 ) -CPU_BE( rev x6, x6 ) + byteorder x3, \be + byteorder x4, \be + byteorder x5, \be + byteorder x6, \be + bitorder x3, \be + bitorder x4, \be + bitorder x5, \be + bitorder x6, \be tst x7, #8 crc32\c\()x w8, w0, x3 @@ -55,33 +96,43 @@ CPU_BE( rev x6, x6 ) 32: ldp x3, x4, [x1], #32 sub x2, x2, #32 ldp x5, x6, [x1, #-16] -CPU_BE( rev x3, x3 ) -CPU_BE( rev x4, x4 ) -CPU_BE( rev x5, x5 ) -CPU_BE( rev x6, x6 ) + byteorder x3, \be + byteorder x4, \be + byteorder x5, \be + byteorder x6, \be + bitorder x3, \be + bitorder x4, \be + bitorder x5, \be + bitorder x6, \be crc32\c\()x w0, w0, x3 crc32\c\()x w0, w0, x4 crc32\c\()x w0, w0, x5 crc32\c\()x w0, w0, x6 cbnz x2, 32b -0: ret +0: bitorder w0, \be + ret 8: tbz x2, #3, 4f ldr x3, [x1], #8 -CPU_BE( rev x3, x3 ) + byteorder x3, \be + bitorder x3, \be crc32\c\()x w0, w0, x3 4: tbz x2, #2, 2f ldr w3, [x1], #4 -CPU_BE( rev w3, w3 ) + byteorder w3, \be + bitorder w3, \be crc32\c\()w w0, w0, w3 2: tbz x2, #1, 1f ldrh w3, [x1], #2 -CPU_BE( rev16 w3, w3 ) + byteorder16 w3, \be + bitorder16 w3, \be crc32\c\()h w0, w0, w3 1: tbz x2, #0, 0f ldrb w3, [x1] + bitorder8 w3, \be crc32\c\()b w0, w0, w3 -0: ret +0: bitorder w0, \be + ret .endm .align 5 @@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32 alternative_else_nop_endif __crc32 c SYM_FUNC_END(__crc32c_le) + + .align 5 +SYM_FUNC_START(crc32_be) +alternative_if_not ARM64_HAS_CRC32 + b crc32_be_base +alternative_else_nop_endif + __crc32 be=1 +SYM_FUNC_END(crc32_be) -- GitLab From 642a7d49c249f04007e68c124a148847471dd476 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Tue, 18 Jan 2022 10:35:15 +0000 Subject: [PATCH 0102/1586] crypto: qat - fix access to PFVF interrupt registers for GEN4 The logic that detects, enables and disables pfvf interrupts was expecting a single CSR per VF. Instead, the source and mask register are two registers with a bit per VF. Due to this, the driver is reading and setting reserved CSRs and not masking the correct source of interrupts. Fix the access to the source and mask register for QAT GEN4 devices by removing the outer loop in adf_gen4_get_vf2pf_sources(), adf_gen4_enable_vf2pf_interrupts() and adf_gen4_disable_vf2pf_interrupts() and changing the helper macros ADF_4XXX_VM2PF_SOU and ADF_4XXX_VM2PF_MSK. Fixes: a9dc0d966605 ("crypto: qat - add PFVF support to the GEN4 host driver") Signed-off-by: Giovanni Cabiddu Co-developed-by: Siming Wan Signed-off-by: Siming Wan Reviewed-by: Xin Zeng Reviewed-by: Wojciech Ziemba Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 42 ++++--------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index 8efbedf63bc80..3b3ea849c5e53 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -9,15 +9,12 @@ #include "adf_pfvf_pf_proto.h" #include "adf_pfvf_utils.h" -#define ADF_4XXX_MAX_NUM_VFS 16 - #define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20)) #define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20)) /* VF2PF interrupt source registers */ -#define ADF_4XXX_VM2PF_SOU(i) (0x41A180 + ((i) * 4)) -#define ADF_4XXX_VM2PF_MSK(i) (0x41A1C0 + ((i) * 4)) -#define ADF_4XXX_VM2PF_INT_EN_MSK BIT(0) +#define ADF_4XXX_VM2PF_SOU 0x41A180 +#define ADF_4XXX_VM2PF_MSK 0x41A1C0 #define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2 #define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F @@ -41,51 +38,30 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr) { - int i; u32 sou, mask; - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - u32 vf_mask = 0; - for (i = 0; i < num_csrs; i++) { - sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU(i)); - mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK(i)); - sou &= ~mask; - vf_mask |= sou << i; - } + sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); + mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); - return vf_mask; + return sou &= ~mask; } static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - unsigned long mask = vf_mask; unsigned int val; - int i; - - for_each_set_bit(i, &mask, num_csrs) { - unsigned int offset = ADF_4XXX_VM2PF_MSK(i); - val = ADF_CSR_RD(pmisc_addr, offset) & ~ADF_4XXX_VM2PF_INT_EN_MSK; - ADF_CSR_WR(pmisc_addr, offset, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask; + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - unsigned long mask = vf_mask; unsigned int val; - int i; - - for_each_set_bit(i, &mask, num_csrs) { - unsigned int offset = ADF_4XXX_VM2PF_MSK(i); - val = ADF_CSR_RD(pmisc_addr, offset) | ADF_4XXX_VM2PF_INT_EN_MSK; - ADF_CSR_WR(pmisc_addr, offset, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask; + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev, -- GitLab From 9b30430ea356f237945e52f8a3a42158877bd5a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Jan 2022 16:13:02 -0800 Subject: [PATCH 0103/1586] crypto: rsa-pkcs1pad - only allow with rsa The pkcs1pad template can be instantiated with an arbitrary akcipher algorithm, which doesn't make sense; it is specifically an RSA padding scheme. Make it check that the underlying algorithm really is RSA. Fixes: 3d5b1ecdea6f ("crypto: rsa - RSA padding algorithm") Cc: # v4.5+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea65..1b35457814258 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -621,6 +621,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) { -- GitLab From e316f7179be22912281ce6331d96d7c121fb2b17 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Jan 2022 16:13:03 -0800 Subject: [PATCH 0104/1586] crypto: rsa-pkcs1pad - correctly get hash from source scatterlist Commit c7381b012872 ("crypto: akcipher - new verify API for public key algorithms") changed akcipher_alg::verify to take in both the signature and the actual hash and do the signature verification, rather than just return the hash expected by the signature as was the case before. To do this, it implemented a hack where the signature and hash are concatenated with each other in one scatterlist. Obviously, for this to work correctly, akcipher_alg::verify needs to correctly extract the two items from the scatterlist it is given. Unfortunately, it doesn't correctly extract the hash in the case where the signature is longer than the RSA key size, as it assumes that the signature's length is equal to the RSA key size. This causes a prefix of the hash, or even the entire hash, to be taken from the *signature*. (Note, the case of a signature longer than the RSA key size should not be allowed in the first place; a separate patch will fix that.) It is unclear whether the resulting scheme has any useful security properties. Fix this by correctly extracting the hash from the scatterlist. Fixes: c7381b012872 ("crypto: akcipher - new verify API for public key algorithms") Cc: # v5.2+ Reviewed-by: Vitaly Chikunov Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 1b35457814258..7b223adebabf6 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -495,7 +495,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) sg_nents_for_len(req->src, req->src_len + req->dst_len), req_ctx->out_buf + ctx->key_size, - req->dst_len, ctx->key_size); + req->dst_len, req->src_len); /* Do the actual verification step. */ if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos, req->dst_len) != 0) -- GitLab From d3481accd974541e6a5d6a1fb588924a3519c36e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Jan 2022 16:13:04 -0800 Subject: [PATCH 0105/1586] crypto: rsa-pkcs1pad - restore signature length check RSA PKCS#1 v1.5 signatures are required to be the same length as the RSA key size. RFC8017 specifically requires the verifier to check this (https://datatracker.ietf.org/doc/html/rfc8017#section-8.2.2). Commit a49de377e051 ("crypto: Add hash param to pkcs1pad") changed the kernel to allow longer signatures, but didn't explain this part of the change; it seems to be unrelated to the rest of the commit. Revert this change, since it doesn't appear to be correct. We can be pretty sure that no one is relying on overly-long signatures (which would have to be front-padded with zeroes) being supported, given that they would have been broken since commit c7381b012872 ("crypto: akcipher - new verify API for public key algorithms"). Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: # v4.6+ Cc: Tadeusz Struk Suggested-by: Vitaly Chikunov Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 7b223adebabf6..6b556ddeb3a00 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -538,7 +538,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL); -- GitLab From a24611ea356c7f3f0ec926da11b9482ac1f414fd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Jan 2022 16:13:05 -0800 Subject: [PATCH 0106/1586] crypto: rsa-pkcs1pad - fix buffer overread in pkcs1pad_verify_complete() Before checking whether the expected digest_info is present, we need to check that there are enough bytes remaining. Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad") Cc: # v4.6+ Cc: Tadeusz Struk Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 6b556ddeb3a00..9d804831c8b3f 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done; -- GitLab From c2a28fdb2f4e0b1676709426d80a986dd601200e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Jan 2022 16:13:06 -0800 Subject: [PATCH 0107/1586] crypto: rsa-pkcs1pad - use clearer variable names The new convention for akcipher_alg::verify makes it unclear which values are the lengths of the signature and digest. Add local variables to make it clearer what is going on. Also rename the digest_size variable in pkcs1pad_sign(), as it is actually the digest *info* size, not the digest size which is different. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 9d804831c8b3f..3285e3af43e14 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -385,15 +385,15 @@ static int pkcs1pad_sign(struct akcipher_request *req) struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); const struct rsa_asn1_template *digest_info = ictx->digest_info; int err; - unsigned int ps_end, digest_size = 0; + unsigned int ps_end, digest_info_size = 0; if (!ctx->key_size) return -EINVAL; if (digest_info) - digest_size = digest_info->size; + digest_info_size = digest_info->size; - if (req->src_len + digest_size > ctx->key_size - 11) + if (req->src_len + digest_info_size > ctx->key_size - 11) return -EOVERFLOW; if (req->dst_len < ctx->key_size) { @@ -406,7 +406,7 @@ static int pkcs1pad_sign(struct akcipher_request *req) if (!req_ctx->in_buf) return -ENOMEM; - ps_end = ctx->key_size - digest_size - req->src_len - 2; + ps_end = ctx->key_size - digest_info_size - req->src_len - 2; req_ctx->in_buf[0] = 0x01; memset(req_ctx->in_buf + 1, 0xff, ps_end - 1); req_ctx->in_buf[ps_end] = 0x00; @@ -441,6 +441,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) struct akcipher_instance *inst = akcipher_alg_instance(tfm); struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); const struct rsa_asn1_template *digest_info = ictx->digest_info; + const unsigned int sig_size = req->src_len; + const unsigned int digest_size = req->dst_len; unsigned int dst_len; unsigned int pos; u8 *out_buf; @@ -487,20 +489,19 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) err = 0; - if (req->dst_len != dst_len - pos) { + if (digest_size != dst_len - pos) { err = -EKEYREJECTED; req->dst_len = dst_len - pos; goto done; } /* Extract appended digest. */ sg_pcopy_to_buffer(req->src, - sg_nents_for_len(req->src, - req->src_len + req->dst_len), + sg_nents_for_len(req->src, sig_size + digest_size), req_ctx->out_buf + ctx->key_size, - req->dst_len, req->src_len); + digest_size, sig_size); /* Do the actual verification step. */ if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos, - req->dst_len) != 0) + digest_size) != 0) err = -EKEYREJECTED; done: kfree_sensitive(req_ctx->out_buf); @@ -536,14 +537,15 @@ static int pkcs1pad_verify(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); + const unsigned int sig_size = req->src_len; + const unsigned int digest_size = req->dst_len; int err; - if (WARN_ON(req->dst) || - WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len != ctx->key_size) + if (WARN_ON(req->dst) || WARN_ON(!digest_size) || + !ctx->key_size || sig_size != ctx->key_size) return -EINVAL; - req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL); + req_ctx->out_buf = kmalloc(ctx->key_size + digest_size, GFP_KERNEL); if (!req_ctx->out_buf) return -ENOMEM; @@ -556,8 +558,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) /* Reuse input buffer, output to a new buffer */ akcipher_request_set_crypt(&req_ctx->child_req, req->src, - req_ctx->out_sg, req->src_len, - ctx->key_size); + req_ctx->out_sg, sig_size, ctx->key_size); err = crypto_akcipher_encrypt(&req_ctx->child_req); if (err != -EINPROGRESS && err != -EBUSY) -- GitLab From 66eae850333d639fc278d6f915c6fc01499ea893 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 19 Jan 2022 17:58:40 +1100 Subject: [PATCH 0108/1586] crypto: authenc - Fix sleep in atomic context in decrypt_tail The function crypto_authenc_decrypt_tail discards its flags argument and always relies on the flags from the original request when starting its sub-request. This is clearly wrong as it may cause the SLEEPABLE flag to be set when it shouldn't. Fixes: 92d95ba91772 ("crypto: authenc - Convert to new AEAD interface") Reported-by: Corentin Labbe Signed-off-by: Herbert Xu Tested-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/authenc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index 670bf1a01d00e..17f674a7cdff5 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); skcipher_request_set_tfm(skreq, ctx->enc); - skcipher_request_set_callback(skreq, aead_request_flags(req), + skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, src, dst, req->cryptlen - authsize, req->iv); -- GitLab From 1c16dfbe6cd6b6ce04e2c3d0c1fa33d80b2547f9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 19 Jan 2022 10:31:09 +0100 Subject: [PATCH 0109/1586] crypto: memneq - avoid implicit unaligned accesses The C standard does not support dereferencing pointers that are not aligned with respect to the pointed-to type, and doing so is technically undefined behavior, even if the underlying hardware supports it. This means that conditionally dereferencing such pointers based on whether CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y is not the right thing to do, and actually results in alignment faults on ARM, which are fixed up on a slow path. Instead, we should use the unaligned accessors in such cases: on architectures that don't care about alignment, they will result in identical codegen whereas, e.g., codegen on ARM will avoid doubleword loads and stores but use ordinary ones, which are able to tolerate misalignment. Link: https://lore.kernel.org/linux-crypto/CAHk-=wiKkdYLY0bv+nXrcJz3NH9mAqPAafX7PpW5EwVtxsEu7Q@mail.gmail.com/ Signed-off-by: Ard Biesheuvel Reviewed-by: Arnd Bergmann Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/memneq.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/crypto/memneq.c b/crypto/memneq.c index afed1bd16aee0..fb11608b1ec1d 100644 --- a/crypto/memneq.c +++ b/crypto/memneq.c @@ -60,6 +60,7 @@ */ #include +#include #ifndef __HAVE_ARCH_CRYPTO_MEMNEQ @@ -71,7 +72,8 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) while (size >= sizeof(unsigned long)) { - neq |= *(unsigned long *)a ^ *(unsigned long *)b; + neq |= get_unaligned((unsigned long *)a) ^ + get_unaligned((unsigned long *)b); OPTIMIZER_HIDE_VAR(neq); a += sizeof(unsigned long); b += sizeof(unsigned long); @@ -95,18 +97,24 @@ static inline unsigned long __crypto_memneq_16(const void *a, const void *b) #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (sizeof(unsigned long) == 8) { - neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b); + neq |= get_unaligned((unsigned long *)a) ^ + get_unaligned((unsigned long *)b); OPTIMIZER_HIDE_VAR(neq); - neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8); + neq |= get_unaligned((unsigned long *)(a + 8)) ^ + get_unaligned((unsigned long *)(b + 8)); OPTIMIZER_HIDE_VAR(neq); } else if (sizeof(unsigned int) == 4) { - neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b); + neq |= get_unaligned((unsigned int *)a) ^ + get_unaligned((unsigned int *)b); OPTIMIZER_HIDE_VAR(neq); - neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4); + neq |= get_unaligned((unsigned int *)(a + 4)) ^ + get_unaligned((unsigned int *)(b + 4)); OPTIMIZER_HIDE_VAR(neq); - neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8); + neq |= get_unaligned((unsigned int *)(a + 8)) ^ + get_unaligned((unsigned int *)(b + 8)); OPTIMIZER_HIDE_VAR(neq); - neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); + neq |= get_unaligned((unsigned int *)(a + 12)) ^ + get_unaligned((unsigned int *)(b + 12)); OPTIMIZER_HIDE_VAR(neq); } else #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ -- GitLab From 85872d1a6f38d133133784c8027d25d1c5328f4f Mon Sep 17 00:00:00 2001 From: Shijith Thotton Date: Fri, 21 Jan 2022 16:42:32 +0530 Subject: [PATCH 0110/1586] crypto: octeontx2 - select CONFIG_NET_DEVLINK OcteonTX2 CPT driver will fail to link without devlink support. aarch64-linux-gnu-ld: otx2_cpt_devlink.o: in function `otx2_cpt_dl_egrp_delete': otx2_cpt_devlink.c:18: undefined reference to `devlink_priv' aarch64-linux-gnu-ld: otx2_cpt_devlink.o: in function `otx2_cpt_dl_egrp_create': otx2_cpt_devlink.c:9: undefined reference to `devlink_priv' aarch64-linux-gnu-ld: otx2_cpt_devlink.o: in function `otx2_cpt_dl_uc_info': otx2_cpt_devlink.c:27: undefined reference to `devlink_priv' Fixes: fed8f4d5f946 ("crypto: octeontx2 - parameters for custom engine groups") Signed-off-by: Shijith Thotton Signed-off-by: Herbert Xu --- drivers/crypto/marvell/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 9125199f1702b..a48591af12d02 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -47,6 +47,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT select CRYPTO_SKCIPHER select CRYPTO_HASH select CRYPTO_AEAD + select NET_DEVLINK help This driver allows you to utilize the Marvell Cryptographic Accelerator Unit(CPT) found in OcteonTX2 series of processors. -- GitLab From 60ef3dde0d6c644769e860d10443a95f1ca9a115 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Jan 2022 16:13:08 +0800 Subject: [PATCH 0111/1586] crypto: hisilicon/sec - fixup icv checking enabled on Kunpeng 930 Fixup icv(integrity check value) checking enabled wrong on Kunpeng 930 Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 6a45bd23b3635..bf93c98473653 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -1614,7 +1614,7 @@ static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir, sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1); sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3; } else { - sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1); + sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE2); sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3; } sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen); -- GitLab From e764d81d58070e66e61fb1b972c81eb9d2ea971e Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Jan 2022 16:13:09 +0800 Subject: [PATCH 0112/1586] crypto: hisilicon/sec - add some comments for soft fallback Modify the print of information that might lead to user misunderstanding. Currently only XTS mode need the fallback tfm when using 192bit key. Others algs not need soft fallback tfm. So others algs can return directly. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index bf93c98473653..48abd3871c0e4 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -641,13 +641,15 @@ static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm) struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; c_ctx->fallback = false; + + /* Currently, only XTS mode need fallback tfm when using 192bit key */ if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ))) return 0; c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(c_ctx->fbtfm)) { - pr_err("failed to alloc fallback tfm!\n"); + pr_err("failed to alloc xts mode fallback tfm!\n"); return PTR_ERR(c_ctx->fbtfm); } @@ -808,7 +810,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, } memcpy(c_ctx->c_key, key, keylen); - if (c_ctx->fallback) { + if (c_ctx->fallback && c_ctx->fbtfm) { ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen); if (ret) { dev_err(dev, "failed to set fallback skcipher key!\n"); @@ -2032,13 +2034,12 @@ static int sec_skcipher_soft_crypto(struct sec_ctx *ctx, struct skcipher_request *sreq, bool encrypt) { struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm); struct device *dev = ctx->dev; int ret; - SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm); - if (!c_ctx->fbtfm) { - dev_err(dev, "failed to check fallback tfm\n"); + dev_err_ratelimited(dev, "the soft tfm isn't supported in the current system.\n"); return -EINVAL; } @@ -2256,7 +2257,6 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) if (ctx->sec->qm.ver == QM_HW_V2) { if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt && req->cryptlen <= authsize))) { - dev_err(dev, "Kunpeng920 not support 0 length!\n"); ctx->a_ctx.fallback = true; return -EINVAL; } -- GitLab From 5e340558c5c7f64504ef1f4d31af152f26705261 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Jan 2022 16:13:10 +0800 Subject: [PATCH 0113/1586] crypto: hisilicon/sec - fix the max length of AAD for the CCM mode Fix the maximum length of AAD for the CCM mode due to the hardware limited. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 48abd3871c0e4..8305eee9a3d34 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -63,6 +63,7 @@ #define SEC_AUTH_CIPHER 0x1 #define SEC_MAX_MAC_LEN 64 #define SEC_MAX_AAD_LEN 65535 +#define SEC_MAX_CCM_AAD_LEN 65279 #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH) #define SEC_PBUF_SZ 512 @@ -2220,6 +2221,10 @@ static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq) } if (c_mode == SEC_CMODE_CCM) { + if (unlikely(req->assoclen > SEC_MAX_CCM_AAD_LEN)) { + dev_err_ratelimited(dev, "CCM input aad parameter is too long!\n"); + return -EINVAL; + } ret = aead_iv_demension_check(req); if (ret) { dev_err(dev, "aead input iv param error!\n"); -- GitLab From 566f060f5453e906dd4195c7e94f1fd25d2c99bb Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Jan 2022 16:13:11 +0800 Subject: [PATCH 0114/1586] crypto: hisilicon/sec - fix the CTR mode BD configuration The CTR counter is 32bit rollover default on the BD. But the NIST standard is 128bit rollover. it cause the testing failed, so need to fix the BD configuration. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 6 ++++++ drivers/crypto/hisilicon/sec2/sec_crypto.h | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 8305eee9a3d34..7013272134b2b 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -42,6 +42,8 @@ #define SEC_DE_OFFSET_V3 9 #define SEC_SCENE_OFFSET_V3 5 #define SEC_CKEY_OFFSET_V3 13 +#define SEC_CTR_CNT_OFFSET 25 +#define SEC_CTR_CNT_ROLLOVER 2 #define SEC_SRC_SGL_OFFSET_V3 11 #define SEC_DST_SGL_OFFSET_V3 14 #define SEC_CALG_OFFSET_V3 4 @@ -1303,6 +1305,10 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) cipher = SEC_CIPHER_DEC; sec_sqe3->c_icv_key |= cpu_to_le16(cipher); + /* Set the CTR counter mode is 128bit rollover */ + sec_sqe3->auth_mac_key = cpu_to_le32((u32)SEC_CTR_CNT_ROLLOVER << + SEC_CTR_CNT_OFFSET); + if (req->use_pbuf) { bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3; bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index 9f71c358a6d35..5e039b50e9d4c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -354,8 +354,10 @@ struct sec_sqe3 { * akey_len: 9~14 bits * a_alg: 15~20 bits * key_sel: 21~24 bits - * updata_key: 25 bits - * reserved: 26~31 bits + * ctr_count_mode/sm4_xts: 25~26 bits + * sva_prefetch: 27 bits + * key_wrap_num: 28~30 bits + * update_key: 31 bits */ __le32 auth_mac_key; __le32 salt; -- GitLab From 498382593c7c90eb81111d315eeecba9508ddf58 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Jan 2022 16:13:12 +0800 Subject: [PATCH 0115/1586] crypto: hisilicon/sec - use the correct print format Use the correct print format. Printing an unsigned int value should use %u instead of %d. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 7013272134b2b..8caba9fd1f19c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -240,7 +240,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) if (unlikely(type != type_supported)) { atomic64_inc(&dfx->err_bd_cnt); - pr_err("err bd type [%d]\n", type); + pr_err("err bd type [%u]\n", type); return; } -- GitLab From 05b3bade290d6c940701f97f3233c07cfe27205d Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 22 Jan 2022 17:30:42 +0800 Subject: [PATCH 0116/1586] crypto: hisilicon/qm - cleanup warning in qm_vf_read_qos The kernel test rebot report this warning: Uninitialized variable: ret. The code flow may return value of ret directly. This value is an uninitialized variable, here is fix it. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c5b84a5ea3501..3b29c8993b8c7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4295,7 +4295,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num) static int qm_vf_read_qos(struct hisi_qm *qm) { int cnt = 0; - int ret; + int ret = -EINVAL; /* reset mailbox qos val */ qm->mb_qos = 0; -- GitLab From 28e9b6d8199a3f124682b143800c2dacdc3d70dd Mon Sep 17 00:00:00 2001 From: Tomas Paukrt Date: Sat, 22 Jan 2022 18:07:53 +0100 Subject: [PATCH 0117/1586] crypto: mxs-dcp - Fix scatterlist processing This patch fixes a bug in scatterlist processing that may cause incorrect AES block encryption/decryption. Fixes: 2e6d793e1bf0 ("crypto: mxs-dcp - Use sg_mapping_iter to copy data") Signed-off-by: Tomas Paukrt Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d19e5ffb5104b..d6f9e2fe863d7 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); } - for_each_sg(req->src, src, sg_nents(src), i) { + for_each_sg(req->src, src, sg_nents(req->src), i) { src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; -- GitLab From 1c4cafd11599abdbc53a520f0b6e6799d037eae1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 23 Jan 2022 10:38:52 -0800 Subject: [PATCH 0118/1586] padata: replace cpumask_weight with cpumask_empty in padata.c padata_do_parallel() calls cpumask_weight() to check if any bit of a given cpumask is set. We can do it more efficiently with cpumask_empty() because cpumask_empty() stops traversing the cpumask as soon as it finds first set bit, while cpumask_weight() counts all bits unconditionally. Signed-off-by: Yury Norov Signed-off-by: Herbert Xu --- kernel/padata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/padata.c b/kernel/padata.c index 18d3a5c699d84..e5819bb8bd1dc 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -181,7 +181,7 @@ int padata_do_parallel(struct padata_shell *ps, goto out; if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) { - if (!cpumask_weight(pd->cpumask.cbcpu)) + if (cpumask_empty(pd->cpumask.cbcpu)) goto out; /* Select an alternate fallback CPU and notify the caller. */ -- GitLab From 31455bbda2081af83f72bb4636348b12b82c37c1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Jan 2022 01:58:36 +0100 Subject: [PATCH 0119/1586] spi: pxa2xx_spi: Convert to use GPIO descriptors This converts the PXA2xx SPI driver to use GPIO descriptors exclusively to retrieve GPIO chip select lines. The device tree and ACPI paths of the driver already use descriptors, hence ->use_gpio_descriptors is already set and this codepath is well tested. Convert all the PXA boards providing chip select GPIOs as platform data and drop the old GPIO chipselect handling in favor of the core managing it exclusively. Cc: Marek Vasut Cc: Daniel Mack Cc: Haojian Zhuang Cc: Robert Jarzmik Cc: linux-arm-kernel@lists.infradead.org Acked-by: Jonathan Cameron Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220125005836.494807-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- Documentation/spi/pxa2xx.rst | 3 -- arch/arm/mach-pxa/corgi.c | 26 ++++++-------- arch/arm/mach-pxa/hx4700.c | 10 +++++- arch/arm/mach-pxa/icontrol.c | 26 +++++++++++--- arch/arm/mach-pxa/littleton.c | 10 +++++- arch/arm/mach-pxa/magician.c | 12 +++++-- arch/arm/mach-pxa/poodle.c | 14 +++++--- arch/arm/mach-pxa/spitz.c | 26 ++++++-------- arch/arm/mach-pxa/stargate2.c | 20 +++++++++-- arch/arm/mach-pxa/z2.c | 20 +++++++++-- drivers/spi/spi-pxa2xx.c | 63 +--------------------------------- include/linux/spi/pxa2xx_spi.h | 1 - 12 files changed, 117 insertions(+), 114 deletions(-) diff --git a/Documentation/spi/pxa2xx.rst b/Documentation/spi/pxa2xx.rst index 6347580826bef..716f65d87d048 100644 --- a/Documentation/spi/pxa2xx.rst +++ b/Documentation/spi/pxa2xx.rst @@ -101,7 +101,6 @@ device. All fields are optional. u8 rx_threshold; u8 dma_burst_size; u32 timeout; - int gpio_cs; }; The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are @@ -146,7 +145,6 @@ field. Below is a sample configuration using the PXA255 NSSP. .rx_threshold = 8, /* SSP hardward FIFO threshold */ .dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */ .timeout = 235, /* See Intel documentation */ - .gpio_cs = 2, /* Use external chip select */ }; static struct pxa2xx_spi_chip cs8405a_chip_info = { @@ -154,7 +152,6 @@ field. Below is a sample configuration using the PXA255 NSSP. .rx_threshold = 8, /* SSP hardward FIFO threshold */ .dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */ .timeout = 235, /* See Intel documentation */ - .gpio_cs = 3, /* Use external chip select */ }; static struct spi_board_info streetracer_spi_board_info[] __initdata = { diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 593c7f793da53..44659fbc37bab 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -530,6 +530,16 @@ static struct pxa2xx_spi_controller corgi_spi_info = { .num_chipselect = 3, }; +static struct gpiod_lookup_table corgi_spi_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void corgi_wait_for_hsync(void) { while (gpio_get_value(CORGI_GPIO_HSYNC)) @@ -548,10 +558,6 @@ static struct ads7846_platform_data corgi_ads7846_info = { .wait_for_sync = corgi_wait_for_hsync, }; -static struct pxa2xx_spi_chip corgi_ads7846_chip = { - .gpio_cs = CORGI_GPIO_ADS7846_CS, -}; - static void corgi_bl_kick_battery(void) { void (*kick_batt)(void); @@ -580,14 +586,6 @@ static struct corgi_lcd_platform_data corgi_lcdcon_info = { .kick_battery = corgi_bl_kick_battery, }; -static struct pxa2xx_spi_chip corgi_lcdcon_chip = { - .gpio_cs = CORGI_GPIO_LCDCON_CS, -}; - -static struct pxa2xx_spi_chip corgi_max1111_chip = { - .gpio_cs = CORGI_GPIO_MAX1111_CS, -}; - static struct spi_board_info corgi_spi_devices[] = { { .modalias = "ads7846", @@ -595,7 +593,6 @@ static struct spi_board_info corgi_spi_devices[] = { .bus_num = 1, .chip_select = 0, .platform_data = &corgi_ads7846_info, - .controller_data= &corgi_ads7846_chip, .irq = PXA_GPIO_TO_IRQ(CORGI_GPIO_TP_INT), }, { .modalias = "corgi-lcd", @@ -603,18 +600,17 @@ static struct spi_board_info corgi_spi_devices[] = { .bus_num = 1, .chip_select = 1, .platform_data = &corgi_lcdcon_info, - .controller_data= &corgi_lcdcon_chip, }, { .modalias = "max1111", .max_speed_hz = 450000, .bus_num = 1, .chip_select = 2, - .controller_data= &corgi_max1111_chip, }, }; static void __init corgi_init_spi(void) { + gpiod_add_lookup_table(&corgi_spi_gpio_table); pxa2xx_set_spi_info(1, &corgi_spi_info); gpiod_add_lookup_table(&corgi_lcdcon_gpio_table); spi_register_board_info(ARRAY_AND_SIZE(corgi_spi_devices)); diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index 1d4c5db54be29..e1870fbb19e7e 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -616,7 +616,6 @@ static struct pxa2xx_spi_chip tsc2046_chip = { .tx_threshold = 1, .rx_threshold = 2, .timeout = 64, - .gpio_cs = GPIO88_HX4700_TSC2046_CS, }; static struct spi_board_info tsc2046_board_info[] __initdata = { @@ -635,6 +634,14 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = { .enable_dma = 1, }; +static struct gpiod_lookup_table pxa_ssp2_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + /* * External power */ @@ -896,6 +903,7 @@ static void __init hx4700_init(void) pxa_set_i2c_info(NULL); i2c_register_board_info(0, ARRAY_AND_SIZE(i2c_board_info)); i2c_register_board_info(1, ARRAY_AND_SIZE(pi2c_board_info)); + gpiod_add_lookup_table(&pxa_ssp2_gpio_table); pxa2xx_set_spi_info(2, &pxa_ssp2_master_info); spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info)); diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index 04a12523cdee0..753fe166ab681 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -42,7 +42,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info1 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS1 }; static struct pxa2xx_spi_chip mcp251x_chip_info2 = { @@ -50,7 +49,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info2 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS2 }; static struct pxa2xx_spi_chip mcp251x_chip_info3 = { @@ -58,7 +56,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info3 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS3 }; static struct pxa2xx_spi_chip mcp251x_chip_info4 = { @@ -66,7 +63,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS4 }; static const struct property_entry mcp251x_properties[] = { @@ -143,6 +139,24 @@ struct platform_device pxa_spi_ssp4 = { } }; +static struct gpiod_lookup_table pxa_ssp3_gpio_table = { + .dev_id = "pxa2xx-spi.3", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW), + { }, + }, +}; + +static struct gpiod_lookup_table pxa_ssp4_gpio_table = { + .dev_id = "pxa2xx-spi.4", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW), + { }, + }, +}; + static struct platform_device *icontrol_spi_devices[] __initdata = { &pxa_spi_ssp3, &pxa_spi_ssp4, @@ -175,6 +189,8 @@ static mfp_cfg_t mfp_can_cfg[] __initdata = { static void __init icontrol_can_init(void) { pxa3xx_mfp_config(ARRAY_AND_SIZE(mfp_can_cfg)); + gpiod_add_lookup_table(&pxa_ssp3_gpio_table); + gpiod_add_lookup_table(&pxa_ssp4_gpio_table); platform_add_devices(ARRAY_AND_SIZE(icontrol_spi_devices)); spi_register_board_info(ARRAY_AND_SIZE(mcp251x_board_info)); } diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index 793f61375ee8c..73f5953b3bb6b 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -195,7 +195,6 @@ static struct pxa2xx_spi_controller littleton_spi_info = { static struct pxa2xx_spi_chip littleton_tdo24m_chip = { .rx_threshold = 1, .tx_threshold = 1, - .gpio_cs = LITTLETON_GPIO_LCD_CS, }; static struct spi_board_info littleton_spi_devices[] __initdata = { @@ -208,8 +207,17 @@ static struct spi_board_info littleton_spi_devices[] __initdata = { }, }; +static struct gpiod_lookup_table littleton_spi_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init littleton_init_spi(void) { + gpiod_add_lookup_table(&littleton_spi_gpio_table); pxa2xx_set_spi_info(2, &littleton_spi_info); spi_register_board_info(ARRAY_AND_SIZE(littleton_spi_devices)); } diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index cd9fa465b9b2a..200fd35168e05 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -938,8 +938,6 @@ struct pxa2xx_spi_chip tsc2046_chip_info = { .tx_threshold = 1, .rx_threshold = 2, .timeout = 64, - /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */ - .gpio_cs = GPIO14_MAGICIAN_TSC2046_CS, }; static struct pxa2xx_spi_controller magician_spi_info = { @@ -947,6 +945,15 @@ static struct pxa2xx_spi_controller magician_spi_info = { .enable_dma = 1, }; +static struct gpiod_lookup_table magician_spi_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */ + GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static struct spi_board_info ads7846_spi_board_info[] __initdata = { { .modalias = "ads7846", @@ -1031,6 +1038,7 @@ static void __init magician_init(void) } else pr_err("LCD detection: CPLD mapping failed\n"); + gpiod_add_lookup_table(&magician_spi_gpio_table); pxa2xx_set_spi_info(2, &magician_spi_info); spi_register_board_info(ARRAY_AND_SIZE(ads7846_spi_board_info)); diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 3a4ecc3c8f8b6..58cfa434afdeb 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -197,6 +197,14 @@ static struct pxa2xx_spi_controller poodle_spi_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table poodle_spi_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", POODLE_GPIO_TP_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static struct ads7846_platform_data poodle_ads7846_info = { .model = 7846, .vref_delay_usecs = 100, @@ -205,23 +213,19 @@ static struct ads7846_platform_data poodle_ads7846_info = { .gpio_pendown = POODLE_GPIO_TP_INT, }; -static struct pxa2xx_spi_chip poodle_ads7846_chip = { - .gpio_cs = POODLE_GPIO_TP_CS, -}; - static struct spi_board_info poodle_spi_devices[] = { { .modalias = "ads7846", .max_speed_hz = 10000, .bus_num = 1, .platform_data = &poodle_ads7846_info, - .controller_data= &poodle_ads7846_chip, .irq = PXA_GPIO_TO_IRQ(POODLE_GPIO_TP_INT), }, }; static void __init poodle_init_spi(void) { + gpiod_add_lookup_table(&poodle_spi_gpio_table); pxa2xx_set_spi_info(1, &poodle_spi_info); spi_register_board_info(ARRAY_AND_SIZE(poodle_spi_devices)); } diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 371008e9bb029..a648e7094e84e 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -510,10 +510,6 @@ static struct ads7846_platform_data spitz_ads7846_info = { .wait_for_sync = spitz_ads7846_wait_for_hsync, }; -static struct pxa2xx_spi_chip spitz_ads7846_chip = { - .gpio_cs = SPITZ_GPIO_ADS7846_CS, -}; - static void spitz_bl_kick_battery(void) { void (*kick_batt)(void); @@ -555,14 +551,6 @@ static struct corgi_lcd_platform_data spitz_lcdcon_info = { .kick_battery = spitz_bl_kick_battery, }; -static struct pxa2xx_spi_chip spitz_lcdcon_chip = { - .gpio_cs = SPITZ_GPIO_LCDCON_CS, -}; - -static struct pxa2xx_spi_chip spitz_max1111_chip = { - .gpio_cs = SPITZ_GPIO_MAX1111_CS, -}; - static struct spi_board_info spitz_spi_devices[] = { { .modalias = "ads7846", @@ -570,7 +558,6 @@ static struct spi_board_info spitz_spi_devices[] = { .bus_num = 2, .chip_select = 0, .platform_data = &spitz_ads7846_info, - .controller_data = &spitz_ads7846_chip, .irq = PXA_GPIO_TO_IRQ(SPITZ_GPIO_TP_INT), }, { .modalias = "corgi-lcd", @@ -578,13 +565,11 @@ static struct spi_board_info spitz_spi_devices[] = { .bus_num = 2, .chip_select = 1, .platform_data = &spitz_lcdcon_info, - .controller_data = &spitz_lcdcon_chip, }, { .modalias = "max1111", .max_speed_hz = 450000, .bus_num = 2, .chip_select = 2, - .controller_data = &spitz_max1111_chip, }, }; @@ -592,6 +577,16 @@ static struct pxa2xx_spi_controller spitz_spi_info = { .num_chipselect = 3, }; +static struct gpiod_lookup_table spitz_spi_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init spitz_spi_init(void) { if (machine_is_akita()) @@ -599,6 +594,7 @@ static void __init spitz_spi_init(void) else gpiod_add_lookup_table(&spitz_lcdcon_gpio_table); + gpiod_add_lookup_table(&spitz_spi_gpio_table); pxa2xx_set_spi_info(2, &spitz_spi_info); spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices)); } diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 8ca02ec1d44ce..b43e2f4536a5a 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -346,6 +346,22 @@ static struct pxa2xx_spi_controller pxa_ssp_master_2_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table pxa_ssp1_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", 24, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + +static struct gpiod_lookup_table pxa_ssp3_gpio_table = { + .dev_id = "pxa2xx-spi.3", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", 39, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + /* An upcoming kernel change will scrap SFRM usage so these * drivers have been moved to use GPIOs */ static struct pxa2xx_spi_chip staccel_chip_info = { @@ -353,7 +369,6 @@ static struct pxa2xx_spi_chip staccel_chip_info = { .rx_threshold = 8, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = 24, }; static struct pxa2xx_spi_chip cc2420_info = { @@ -361,7 +376,6 @@ static struct pxa2xx_spi_chip cc2420_info = { .rx_threshold = 8, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = 39, }; static struct spi_board_info spi_board_info[] __initdata = { @@ -410,6 +424,8 @@ static void __init imote2_stargate2_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + gpiod_add_lookup_table(&pxa_ssp1_gpio_table); + gpiod_add_lookup_table(&pxa_ssp3_gpio_table); pxa2xx_set_spi_info(1, &pxa_ssp_master_0_info); pxa2xx_set_spi_info(2, &pxa_ssp_master_1_info); pxa2xx_set_spi_info(3, &pxa_ssp_master_2_info); diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c index 8e74fbb0a96e2..7eaeda2699270 100644 --- a/arch/arm/mach-pxa/z2.c +++ b/arch/arm/mach-pxa/z2.c @@ -570,7 +570,6 @@ static struct pxa2xx_spi_chip z2_lbs_chip_info = { .rx_threshold = 8, .tx_threshold = 8, .timeout = 1000, - .gpio_cs = GPIO24_ZIPITZ2_WIFI_CS, }; static struct libertas_spi_platform_data z2_lbs_pdata = { @@ -584,7 +583,6 @@ static struct pxa2xx_spi_chip lms283_chip_info = { .rx_threshold = 1, .tx_threshold = 1, .timeout = 64, - .gpio_cs = GPIO88_ZIPITZ2_LCD_CS, }; static struct gpiod_lookup_table lms283_gpio_table = { @@ -624,8 +622,26 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table pxa_ssp1_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + +static struct gpiod_lookup_table pxa_ssp2_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init z2_spi_init(void) { + gpiod_add_lookup_table(&pxa_ssp1_gpio_table); + gpiod_add_lookup_table(&pxa_ssp2_gpio_table); pxa2xx_set_spi_info(1, &pxa_ssp1_master_info); pxa2xx_set_spi_info(2, &pxa_ssp2_master_info); gpiod_add_lookup_table(&lms283_gpio_table); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index e88f86274eeb0..abb9f0ffd3773 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -1163,57 +1162,6 @@ static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller) return 0; } -static void cleanup_cs(struct spi_device *spi) -{ - if (!gpio_is_valid(spi->cs_gpio)) - return; - - gpio_free(spi->cs_gpio); - spi->cs_gpio = -ENOENT; -} - -static int setup_cs(struct spi_device *spi, struct chip_data *chip, - struct pxa2xx_spi_chip *chip_info) -{ - struct driver_data *drv_data = spi_controller_get_devdata(spi->controller); - - if (chip == NULL) - return 0; - - if (chip_info == NULL) - return 0; - - if (drv_data->ssp_type == CE4100_SSP) - return 0; - - /* - * NOTE: setup() can be called multiple times, possibly with - * different chip_info, release previously requested GPIO. - */ - cleanup_cs(spi); - - if (gpio_is_valid(chip_info->gpio_cs)) { - int gpio = chip_info->gpio_cs; - int err; - - err = gpio_request(gpio, "SPI_CS"); - if (err) { - dev_err(&spi->dev, "failed to request chip select GPIO%d\n", gpio); - return err; - } - - err = gpio_direction_output(gpio, !(spi->mode & SPI_CS_HIGH)); - if (err) { - gpio_free(gpio); - return err; - } - - spi->cs_gpio = gpio; - } - - return 0; -} - static int setup(struct spi_device *spi) { struct pxa2xx_spi_chip *chip_info; @@ -1222,7 +1170,6 @@ static int setup(struct spi_device *spi) struct driver_data *drv_data = spi_controller_get_devdata(spi->controller); uint tx_thres, tx_hi_thres, rx_thres; - int err; switch (drv_data->ssp_type) { case QUARK_X1000_SSP: @@ -1365,21 +1312,13 @@ static int setup(struct spi_device *spi) spi_set_ctldata(spi, chip); - if (drv_data->ssp_type == CE4100_SSP) - return 0; - - err = setup_cs(spi, chip, chip_info); - if (err) - kfree(chip); - - return err; + return 0; } static void cleanup(struct spi_device *spi) { struct chip_data *chip = spi_get_ctldata(spi); - cleanup_cs(spi); kfree(chip); } diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index ca74dce367065..4658e7801b42e 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -42,7 +42,6 @@ struct pxa2xx_spi_chip { u8 rx_threshold; u8 dma_burst_size; u32 timeout; - int gpio_cs; }; #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) -- GitLab From 1a5a87d541b442293dfcc2253b652bc7b7e02d09 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 22 Jan 2022 01:33:02 +0100 Subject: [PATCH 0120/1586] spi: mt65xx: Convert to GPIO descriptors The MT65xx driver was already relying on the core to get some GPIO line numbers so it can be (hopefully) trivially converted to use descriptors instead. Cc: Dafna Hirschfeld Cc: Mason Zhang Cc: Guenter Roeck Cc: Peter Hess Cc: Leilk Liu Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220122003302.374304-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a15de10ee286a..4f49b2e93ca7b 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -605,8 +605,9 @@ static int mtk_spi_setup(struct spi_device *spi) if (!spi->controller_data) spi->controller_data = (void *)&mtk_default_chip_info; - if (mdata->dev_comp->need_pad_sel && gpio_is_valid(spi->cs_gpio)) - gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); + if (mdata->dev_comp->need_pad_sel && spi->cs_gpiod) + /* CS de-asserted, gpiolib will handle inversion */ + gpiod_direction_output(spi->cs_gpiod, 0); return 0; } @@ -730,6 +731,7 @@ static int mtk_spi_probe(struct platform_device *pdev) master->can_dma = mtk_spi_can_dma; master->setup = mtk_spi_setup; master->set_cs_timing = mtk_spi_set_hw_cs_timing; + master->use_gpio_descriptors = true; of_id = of_match_node(mtk_spi_of_match, pdev->dev.of_node); if (!of_id) { @@ -853,25 +855,12 @@ static int mtk_spi_probe(struct platform_device *pdev) goto err_disable_runtime_pm; } - if (!master->cs_gpios && master->num_chipselect > 1) { + if (!master->cs_gpiods && master->num_chipselect > 1) { dev_err(&pdev->dev, "cs_gpios not specified and num_chipselect > 1\n"); ret = -EINVAL; goto err_disable_runtime_pm; } - - if (master->cs_gpios) { - for (i = 0; i < master->num_chipselect; i++) { - ret = devm_gpio_request(&pdev->dev, - master->cs_gpios[i], - dev_name(&pdev->dev)); - if (ret) { - dev_err(&pdev->dev, - "can't get CS GPIO %i\n", i); - goto err_disable_runtime_pm; - } - } - } } if (mdata->dev_comp->dma_ext) -- GitLab From 2818824ced4be5abc22c450340d548702f166d9a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Jan 2022 01:26:00 +0100 Subject: [PATCH 0121/1586] spi: mpc512x-psc: Convert to use GPIO descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver is already relying on the core to provide valid GPIO numbers in spi->cs_gpio through of_spi_get_gpio_numbers(), so we can switch to letting the core use GPIO descriptors instead. The driver was assigning a local function to the custom chipselect callback, but I chose to just open code the gpiod setting instead, this is easier to read. The only platform that overrides the cs_control callback is the mpc832x_rdb. Cc: Uwe Kleine-König Cc: Anatolij Gustschin Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220120002600.216667-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-mpc512x-psc.c | 46 ++++++++++++++--------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 78a9bca8cc689..8a488d8e4c1be 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -23,7 +23,6 @@ #include #include #include -#include #include enum { @@ -128,17 +127,28 @@ static void mpc512x_psc_spi_activate_cs(struct spi_device *spi) out_be32(psc_addr(mps, ccr), ccr); mps->bits_per_word = cs->bits_per_word; - if (mps->cs_control && gpio_is_valid(spi->cs_gpio)) - mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0); + if (cs->gpiod) { + if (mps->cs_control) + /* boardfile override */ + mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0); + else + /* gpiolib will deal with the inversion */ + gpiod_set_value(spi->cs_gpiod, 1); + } } static void mpc512x_psc_spi_deactivate_cs(struct spi_device *spi) { struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master); - if (mps->cs_control && gpio_is_valid(spi->cs_gpio)) - mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1); - + if (spi->cs_gpiod) { + if (mps->cs_control) + /* boardfile override */ + mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1); + else + /* gpiolib will deal with the inversion */ + gpiod_set_value(spi->cs_gpiod, 0); + } } /* extract and scale size field in txsz or rxsz */ @@ -373,18 +383,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi) if (!cs) return -ENOMEM; - if (gpio_is_valid(spi->cs_gpio)) { - ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev)); - if (ret) { - dev_err(&spi->dev, "can't get CS gpio: %d\n", - ret); - kfree(cs); - return ret; - } - gpio_direction_output(spi->cs_gpio, - spi->mode & SPI_CS_HIGH ? 0 : 1); - } - spi->controller_state = cs; } @@ -396,8 +394,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi) static void mpc512x_psc_spi_cleanup(struct spi_device *spi) { - if (gpio_is_valid(spi->cs_gpio)) - gpio_free(spi->cs_gpio); kfree(spi->controller_state); } @@ -476,11 +472,6 @@ static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id) return IRQ_NONE; } -static void mpc512x_spi_cs_control(struct spi_device *spi, bool onoff) -{ - gpio_set_value(spi->cs_gpio, onoff); -} - static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, u32 size, unsigned int irq) { @@ -500,9 +491,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, mps->type = (int)of_device_get_match_data(dev); mps->irq = irq; - if (pdata == NULL) { - mps->cs_control = mpc512x_spi_cs_control; - } else { + if (pdata) { mps->cs_control = pdata->cs_control; master->bus_num = pdata->bus_num; master->num_chipselect = pdata->max_chipselect; @@ -513,6 +502,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, master->prepare_transfer_hardware = mpc512x_psc_spi_prep_xfer_hw; master->transfer_one_message = mpc512x_psc_spi_msg_xfer; master->unprepare_transfer_hardware = mpc512x_psc_spi_unprep_xfer_hw; + master->use_gpio_descriptors = true; master->cleanup = mpc512x_psc_spi_cleanup; master->dev.of_node = dev->of_node; -- GitLab From 99407f11b5657cd66625c6b55a73d38b67803a8c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 22 Jan 2022 01:48:46 +0100 Subject: [PATCH 0122/1586] spi: pic32: Convert to use GPIO descriptors The driver already relies on the core looking up GPIO lines from the core, so this is trivial to switch over to using GPIO descriptors. Cc: Purna Chandra Mandal Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220122004846.374930-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-pic32.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c index f86433b29260e..7e5c09a7d4890 100644 --- a/drivers/spi/spi-pic32.c +++ b/drivers/spi/spi-pic32.c @@ -591,18 +591,16 @@ static int pic32_spi_setup(struct spi_device *spi) * unreliable/erroneous SPI transactions. * To avoid that we will always handle /CS by toggling GPIO. */ - if (!gpio_is_valid(spi->cs_gpio)) + if (!spi->cs_gpiod) return -EINVAL; - gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); - return 0; } static void pic32_spi_cleanup(struct spi_device *spi) { - /* de-activate cs-gpio */ - gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); + /* de-activate cs-gpio, gpiolib will handle inversion */ + gpiod_direction_output(spi->cs_gpiod, 0); } static int pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev) @@ -784,6 +782,7 @@ static int pic32_spi_probe(struct platform_device *pdev) master->unprepare_message = pic32_spi_unprepare_message; master->prepare_transfer_hardware = pic32_spi_prepare_hardware; master->unprepare_transfer_hardware = pic32_spi_unprepare_hardware; + master->use_gpio_descriptors = true; /* optional DMA support */ ret = pic32_spi_dma_prep(pic32s, &pdev->dev); -- GitLab From 6938e02f8658734209fc1f68dc3a6cd355f4f737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=8A=9B=E8=B1=AA?= Date: Fri, 28 Jan 2022 22:47:41 +0800 Subject: [PATCH 0123/1586] spi: sp7201: Fix compiler warnings Fix compiler warming for kernel test Fixes: f62ca4e2a863 ("spi: Add spi driver for Sunplus SP7021") Signed-off-by: Li-hao Kuo Link: https://lore.kernel.org/r/CAGcXWkzM6wbhNFLbYoijq7iS_76nYVod1ySFEDu-BRgnBokEQA@mail.gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sunplus-sp7021.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c index cbbb1664017eb..e5bdeb3eba458 100644 --- a/drivers/spi/spi-sunplus-sp7021.c +++ b/drivers/spi/spi-sunplus-sp7021.c @@ -124,7 +124,7 @@ static int sp7021_spi_slave_abort(struct spi_controller *ctlr) return 0; } -int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer) +static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer) { struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller); @@ -142,7 +142,7 @@ int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer) return 0; } -int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer) +static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer) { struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller); int ret = 0; @@ -160,7 +160,7 @@ int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer) return ret; } -void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len) +static void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len) { int i; @@ -171,7 +171,7 @@ void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len) } } -void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len) +static void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len) { int i; @@ -558,6 +558,7 @@ static int __maybe_unused sp7021_spi_controller_resume(struct device *dev) return clk_prepare_enable(pspim->spi_clk); } +#ifdef CONFIG_PM static int sp7021_spi_runtime_suspend(struct device *dev) { struct spi_controller *ctlr = dev_get_drvdata(dev); @@ -573,6 +574,7 @@ static int sp7021_spi_runtime_resume(struct device *dev) return reset_control_deassert(pspim->rstc); } +#endif static const struct dev_pm_ops sp7021_spi_pm_ops = { SET_RUNTIME_PM_OPS(sp7021_spi_runtime_suspend, -- GitLab From 7f99cb5e60392fc3494c610776e733b68784280c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Jan 2022 11:35:37 +0100 Subject: [PATCH 0124/1586] x86/CPU/AMD: Use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the AMD mce sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that the obsolete default_attrs field can be removed soon. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: https://lore.kernel.org/r/20220106103537.3663852-1-gregkh@linuxfoundation.org --- arch/x86/kernel/cpu/mce/amd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9f4b508886dde..1940d305db1c0 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = { NULL, /* possibly interrupt_enable if supported, see below */ NULL, }; +ATTRIBUTE_GROUPS(default); #define to_block(k) container_of(k, struct threshold_block, kobj) #define to_attr(a) container_of(a, struct threshold_attr, attr) @@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj); static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, + .default_groups = default_groups, .release = threshold_block_release, }; @@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb b->threshold_limit = THRESHOLD_MAX; if (b->interrupt_capable) { - threshold_ktype.default_attrs[2] = &interrupt_enable.attr; + default_attrs[2] = &interrupt_enable.attr; b->interrupt_enable = 1; } else { - threshold_ktype.default_attrs[2] = NULL; + default_attrs[2] = NULL; } INIT_LIST_HEAD(&b->miscj); -- GitLab From 0dcab41d3487acadf64d0667398e032341bd9918 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 31 Jan 2022 15:01:07 -0800 Subject: [PATCH 0125/1586] x86/cpu: Merge Intel and AMD ppin_init() functions The code to decide whether a system supports the PPIN (Protected Processor Inventory Number) MSR was cloned from the Intel implementation. Apart from the X86_FEATURE bit and the MSR numbers it is identical. Merge the two functions into common x86 code, but use x86_match_cpu() instead of the switch (c->x86_model) that was used by the old Intel code. No functional change. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220131230111.2004669-2-tony.luck@intel.com --- arch/x86/kernel/cpu/amd.c | 30 ------------- arch/x86/kernel/cpu/common.c | 74 +++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mce/intel.c | 42 ------------------- 3 files changed, 74 insertions(+), 72 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4edb6f0f628c2..bad0fa4c17799 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } -static void amd_detect_ppin(struct cpuinfo_x86 *c) -{ - unsigned long long val; - - if (!cpu_has(c, X86_FEATURE_AMD_PPIN)) - return; - - /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */ - if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val)) - goto clear_ppin; - - /* PPIN is locked in disabled mode, clear feature bit */ - if ((val & 3UL) == 1UL) - goto clear_ppin; - - /* If PPIN is disabled, try to enable it */ - if (!(val & 2UL)) { - wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL); - rdmsrl_safe(MSR_AMD_PPIN_CTL, &val); - } - - /* If PPIN_EN bit is 1, return from here; otherwise fall through */ - if (val & 2UL) - return; - -clear_ppin: - clear_cpu_cap(c, X86_FEATURE_AMD_PPIN); -} - u32 amd_get_nodes_per_socket(void) { return nodes_per_socket; @@ -947,7 +918,6 @@ static void init_amd(struct cpuinfo_x86 *c) amd_detect_cmp(c); amd_get_topology(c); srat_detect_node(c); - amd_detect_ppin(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b8382c117889..b0bd8a6b5beb4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -88,6 +88,78 @@ EXPORT_SYMBOL_GPL(get_llc_id); /* L2 cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID; +static struct ppin_info { + int feature; + int msr_ppin_ctl; +} ppin_info[] = { + [X86_VENDOR_INTEL] = { + .feature = X86_FEATURE_INTEL_PPIN, + .msr_ppin_ctl = MSR_PPIN_CTL, + }, + [X86_VENDOR_AMD] = { + .feature = X86_FEATURE_AMD_PPIN, + .msr_ppin_ctl = MSR_AMD_PPIN_CTL, + }, +}; + +static const struct x86_cpu_id ppin_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]), + + /* Legacy models without CPUID enumeration */ + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), + + {} +}; + +static void ppin_init(struct cpuinfo_x86 *c) +{ + const struct x86_cpu_id *id; + unsigned long long val; + struct ppin_info *info; + + id = x86_match_cpu(ppin_cpuids); + if (!id) + return; + + /* + * Testing the presence of the MSR is not enough. Need to check + * that the PPIN_CTL allows reading of the PPIN. + */ + info = (struct ppin_info *)id->driver_data; + + if (rdmsrl_safe(info->msr_ppin_ctl, &val)) + goto clear_ppin; + + if ((val & 3UL) == 1UL) { + /* PPIN locked in disabled mode */ + goto clear_ppin; + } + + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { + wrmsrl_safe(info->msr_ppin_ctl, val | 2UL); + rdmsrl_safe(info->msr_ppin_ctl, &val); + } + + /* Is the enable bit set? */ + if (val & 2UL) { + set_cpu_cap(c, info->feature); + return; + } + +clear_ppin: + clear_cpu_cap(c, info->feature); +} + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { @@ -1655,6 +1727,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] |= boot_cpu_data.x86_capability[i]; } + ppin_init(c); + /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index baafbb37be678..95275a5e57e06 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -470,47 +470,6 @@ void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } -static void intel_ppin_init(struct cpuinfo_x86 *c) -{ - unsigned long long val; - - /* - * Even if testing the presence of the MSR would be enough, we don't - * want to risk the situation where other models reuse this MSR for - * other purposes. - */ - switch (c->x86_model) { - case INTEL_FAM6_IVYBRIDGE_X: - case INTEL_FAM6_HASWELL_X: - case INTEL_FAM6_BROADWELL_D: - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: - - if (rdmsrl_safe(MSR_PPIN_CTL, &val)) - return; - - if ((val & 3UL) == 1UL) { - /* PPIN locked in disabled mode */ - return; - } - - /* If PPIN is disabled, try to enable */ - if (!(val & 2UL)) { - wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); - rdmsrl_safe(MSR_PPIN_CTL, &val); - } - - /* Is the enable bit set? */ - if (val & 2UL) - set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); - } -} - /* * Enable additional error logs from the integrated * memory controller on processors that support this. @@ -535,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_cmci(); intel_init_lmce(); - intel_ppin_init(c); intel_imc_init(c); } -- GitLab From 00a2f23eef7d1fa6c2dfdc613857b84fbf5e2b3b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 31 Jan 2022 15:01:08 -0800 Subject: [PATCH 0126/1586] x86/cpu: X86_FEATURE_INTEL_PPIN finally has a CPUID bit After nine generations of adding to model specific list of CPUs that support PPIN (Protected Processor Inventory Number) Intel allocated a CPUID bit to enumerate the MSRs. CPUID(EAX=7, ECX=1).EBX bit 0 enumerates presence of MSR_PPIN_CTL and MSR_PPIN. Add it to the "scattered" CPUID bits and add an entry to the ppin_cpuids[] x86_match_cpu() array to catch Intel CPUs that implement it. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220131230111.2004669-3-tony.luck@intel.com --- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b0bd8a6b5beb4..0681c69a1f09c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -104,6 +104,7 @@ static struct ppin_info { static const struct x86_cpu_id ppin_cpuids[] = { X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]), + X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]), /* Legacy models without CPUID enumeration */ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 21d1f062895a8..4143b1e4c5c6d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, -- GitLab From 822ccfade55b6be7977b364356fcf2d78d8a373a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 31 Jan 2022 15:01:09 -0800 Subject: [PATCH 0127/1586] x86/cpu: Read/save PPIN MSR during initialization Currently, the PPIN (Protected Processor Inventory Number) MSR is read by every CPU that processes a machine check, CMCI, or just polls machine check banks from a periodic timer. This is not a "fast" MSR, so this adds to overhead of processing errors. Add a new "ppin" field to the cpuinfo_x86 structure. Read and save the PPIN during initialization. Use this copy in mce_setup() instead of reading the MSR. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220131230111.2004669-4-tony.luck@intel.com --- arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/mce/core.c | 7 +------ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2c5f12ae7d042..a87e7c33d5ac1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -119,6 +119,8 @@ struct cpuinfo_x86 { int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; + /* protected processor identification number */ + u64 ppin; /* cpuid returned max cores value: */ u16 x86_max_cores; u16 apicid; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0681c69a1f09c..64deb7727d007 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -91,14 +91,17 @@ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID; static struct ppin_info { int feature; int msr_ppin_ctl; + int msr_ppin; } ppin_info[] = { [X86_VENDOR_INTEL] = { .feature = X86_FEATURE_INTEL_PPIN, .msr_ppin_ctl = MSR_PPIN_CTL, + .msr_ppin = MSR_PPIN }, [X86_VENDOR_AMD] = { .feature = X86_FEATURE_AMD_PPIN, .msr_ppin_ctl = MSR_AMD_PPIN_CTL, + .msr_ppin = MSR_AMD_PPIN }, }; @@ -153,6 +156,7 @@ static void ppin_init(struct cpuinfo_x86 *c) /* Is the enable bit set? */ if (val & 2UL) { + c->ppin = __rdmsr(info->msr_ppin); set_cpu_cap(c, info->feature); return; } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5818b837fd4d4..4f1e825033ce9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -138,12 +138,7 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP); - - if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) - m->ppin = __rdmsr(MSR_PPIN); - else if (this_cpu_has(X86_FEATURE_AMD_PPIN)) - m->ppin = __rdmsr(MSR_AMD_PPIN); - + m->ppin = cpu_data(m->extcpu).ppin; m->microcode = boot_cpu_data.microcode; } -- GitLab From 182ecfaf757de234a5262f51c0e699bec7258a67 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 31 Jan 2022 15:01:10 -0800 Subject: [PATCH 0128/1586] topology/sysfs: Add format parameter to macro defining "show" functions for proc All the simple (non-mask and non-list files in /sys/devices/system/cpu/cpu0/topology/ are currently printed as decimal integers. Refactor the macro that generates the "show" functions to take a format parameter to allow future files to display in other formats. No functional change. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220131230111.2004669-5-tony.luck@intel.com --- drivers/base/topology.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/base/topology.c b/drivers/base/topology.c index fc24e89f9592f..044f3664f8f22 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -14,11 +14,11 @@ #include #include -#define define_id_show_func(name) \ +#define define_id_show_func(name, fmt) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ - return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \ + return sysfs_emit(buf, fmt "\n", topology_##name(dev->id)); \ } #define define_siblings_read_func(name, mask) \ @@ -42,20 +42,20 @@ static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \ off, count); \ } -define_id_show_func(physical_package_id); +define_id_show_func(physical_package_id, "%d"); static DEVICE_ATTR_RO(physical_package_id); #ifdef TOPOLOGY_DIE_SYSFS -define_id_show_func(die_id); +define_id_show_func(die_id, "%d"); static DEVICE_ATTR_RO(die_id); #endif #ifdef TOPOLOGY_CLUSTER_SYSFS -define_id_show_func(cluster_id); +define_id_show_func(cluster_id, "%d"); static DEVICE_ATTR_RO(cluster_id); #endif -define_id_show_func(core_id); +define_id_show_func(core_id, "%d"); static DEVICE_ATTR_RO(core_id); define_siblings_read_func(thread_siblings, sibling_cpumask); @@ -87,7 +87,7 @@ static BIN_ATTR_RO(package_cpus, 0); static BIN_ATTR_RO(package_cpus_list, 0); #ifdef TOPOLOGY_BOOK_SYSFS -define_id_show_func(book_id); +define_id_show_func(book_id, "%d"); static DEVICE_ATTR_RO(book_id); define_siblings_read_func(book_siblings, book_cpumask); static BIN_ATTR_RO(book_siblings, 0); @@ -95,7 +95,7 @@ static BIN_ATTR_RO(book_siblings_list, 0); #endif #ifdef TOPOLOGY_DRAWER_SYSFS -define_id_show_func(drawer_id); +define_id_show_func(drawer_id, "%d"); static DEVICE_ATTR_RO(drawer_id); define_siblings_read_func(drawer_siblings, drawer_cpumask); static BIN_ATTR_RO(drawer_siblings, 0); -- GitLab From ab28e944197fa78e6af7c4a0ffd6bba9a5bbacf0 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 31 Jan 2022 15:01:11 -0800 Subject: [PATCH 0129/1586] topology/sysfs: Add PPIN in sysfs under cpu topology PPIN is the Protected Processor Identification Number. This is used to identify the socket as a Field Replaceable Unit (FRU). Existing code only displays this when reporting errors. But this makes it inconvenient for large clusters to use it for its intended purpose of inventory control. Add ppin to /sys/devices/system/cpu/cpu*/topology to make what is already available using RDMSR more easily accessible. Make the file read only for root in case there are still people concerned about making a unique system "serial number" available. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220131230111.2004669-6-tony.luck@intel.com --- Documentation/ABI/stable/sysfs-devices-system-cpu | 4 ++++ Documentation/ABI/testing/sysfs-devices-system-cpu | 6 ++++++ arch/x86/include/asm/topology.h | 1 + drivers/base/topology.c | 4 ++++ include/linux/topology.h | 3 +++ 5 files changed, 18 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu index 3965ce504484a..902392d7eddf0 100644 --- a/Documentation/ABI/stable/sysfs-devices-system-cpu +++ b/Documentation/ABI/stable/sysfs-devices-system-cpu @@ -86,6 +86,10 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus Description: internal kernel map of CPUs within the same die. Values: hexadecimal bitmask. +What: /sys/devices/system/cpu/cpuX/topology/ppin +Description: per-socket protected processor inventory number +Values: hexadecimal. + What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list Description: human-readable list of CPUs within the same die. The format is like 0-3, 8-11, 14,17. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 61f5676a7429a..74962c2007902 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -73,6 +73,7 @@ What: /sys/devices/system/cpu/cpuX/topology/core_id /sys/devices/system/cpu/cpuX/topology/physical_package_id /sys/devices/system/cpu/cpuX/topology/thread_siblings /sys/devices/system/cpu/cpuX/topology/thread_siblings_list + /sys/devices/system/cpu/cpuX/topology/ppin Date: December 2008 Contact: Linux kernel mailing list Description: CPU topology files that describe a logical CPU's relationship @@ -103,6 +104,11 @@ Description: CPU topology files that describe a logical CPU's relationship thread_siblings_list: human-readable list of cpuX's hardware threads within the same core as cpuX + ppin: human-readable Protected Processor Identification + Number of the socket the cpu# belongs to. There should be + one per physical_package_id. File is readable only to + admin. + See Documentation/admin-guide/cputopology.rst for more information. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 2f0b6be8eaabc..43a89476a5222 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_ppin(cpu) (cpu_data(cpu).ppin) extern unsigned int __max_die_per_package; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 044f3664f8f22..e9d1efcda89b3 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -58,6 +58,9 @@ static DEVICE_ATTR_RO(cluster_id); define_id_show_func(core_id, "%d"); static DEVICE_ATTR_RO(core_id); +define_id_show_func(ppin, "0x%llx"); +static DEVICE_ATTR_ADMIN_RO(ppin); + define_siblings_read_func(thread_siblings, sibling_cpumask); static BIN_ATTR_RO(thread_siblings, 0); static BIN_ATTR_RO(thread_siblings_list, 0); @@ -145,6 +148,7 @@ static struct attribute *default_attrs[] = { #ifdef TOPOLOGY_DRAWER_SYSFS &dev_attr_drawer_id.attr, #endif + &dev_attr_ppin.attr, NULL }; diff --git a/include/linux/topology.h b/include/linux/topology.h index a6e201758ae9e..f19bc3626297a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_drawer_id #define topology_drawer_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_ppin +#define topology_ppin(cpu) ((void)(cpu), 0ull) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif -- GitLab From 321599693213c81b2ce8530abb27e39528e969f9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 1 Feb 2022 02:26:54 +0100 Subject: [PATCH 0130/1586] spi: st-ssc4: Covert to use GPIO descriptors This switches the ST SSC SPI controller to use GPIO descriptors from the core instead of GPIO numbers. It is already using the core parsing of GPIO numbers so the switch is pretty straight-forward. Cc: Lee Jones Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220201012654.562578-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-st-ssc4.c | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c index 6c44dda9ee8c5..843be803696bc 100644 --- a/drivers/spi/spi-st-ssc4.c +++ b/drivers/spi/spi-st-ssc4.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -171,11 +170,6 @@ static int spi_st_transfer_one(struct spi_master *master, return t->len; } -static void spi_st_cleanup(struct spi_device *spi) -{ - gpio_free(spi->cs_gpio); -} - /* the spi->mode bits understood by this driver: */ #define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_LOOP | SPI_CS_HIGH) static int spi_st_setup(struct spi_device *spi) @@ -183,29 +177,17 @@ static int spi_st_setup(struct spi_device *spi) struct spi_st *spi_st = spi_master_get_devdata(spi->master); u32 spi_st_clk, sscbrg, var; u32 hz = spi->max_speed_hz; - int cs = spi->cs_gpio; - int ret; if (!hz) { dev_err(&spi->dev, "max_speed_hz unspecified\n"); return -EINVAL; } - if (!gpio_is_valid(cs)) { - dev_err(&spi->dev, "%d is not a valid gpio\n", cs); + if (!spi->cs_gpiod) { + dev_err(&spi->dev, "no valid gpio assigned\n"); return -EINVAL; } - ret = gpio_request(cs, dev_name(&spi->dev)); - if (ret) { - dev_err(&spi->dev, "could not request gpio:%d\n", cs); - return ret; - } - - ret = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH); - if (ret) - goto out_free_gpio; - spi_st_clk = clk_get_rate(spi_st->clk); /* Set SSC_BRF */ @@ -213,8 +195,7 @@ static int spi_st_setup(struct spi_device *spi) if (sscbrg < 0x07 || sscbrg > BIT(16)) { dev_err(&spi->dev, "baudrate %d outside valid range %d\n", sscbrg, hz); - ret = -EINVAL; - goto out_free_gpio; + return -EINVAL; } spi_st->baud = spi_st_clk / (2 * sscbrg); @@ -263,10 +244,6 @@ static int spi_st_setup(struct spi_device *spi) readl_relaxed(spi_st->base + SSC_RBUF); return 0; - -out_free_gpio: - gpio_free(cs); - return ret; } /* Interrupt fired when TX shift register becomes empty */ @@ -309,11 +286,11 @@ static int spi_st_probe(struct platform_device *pdev) master->dev.of_node = np; master->mode_bits = MODEBITS; master->setup = spi_st_setup; - master->cleanup = spi_st_cleanup; master->transfer_one = spi_st_transfer_one; master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16); master->auto_runtime_pm = true; master->bus_num = pdev->id; + master->use_gpio_descriptors = true; spi_st = spi_master_get_devdata(master); spi_st->clk = devm_clk_get(&pdev->dev, "ssc"); -- GitLab From b651d1da86aa525c5a5b2bd61f528353c28d589d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 1 Feb 2022 02:29:56 +0100 Subject: [PATCH 0131/1586] spi: bcm2835aux: Convert to use GPIO descriptors This one is pretty straight forward to switch over, the driver already relies on inspecting cs_gpio just check cs_gpiod instead and stop the special handling of requesting the GPIO and stuff the core will take care of. Cc: Lukas Wunner Cc: Martin Sperl Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220201012956.563272-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835aux.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 7d709a8c833bb..e285219223308 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -445,25 +444,12 @@ static void bcm2835aux_spi_handle_err(struct spi_master *master, static int bcm2835aux_spi_setup(struct spi_device *spi) { - int ret; - /* sanity check for native cs */ if (spi->mode & SPI_NO_CS) return 0; - if (gpio_is_valid(spi->cs_gpio)) { - /* with gpio-cs set the GPIO to the correct level - * and as output (in case the dt has the gpio not configured - * as output but native cs) - */ - ret = gpio_direction_output(spi->cs_gpio, - (spi->mode & SPI_CS_HIGH) ? 0 : 1); - if (ret) - dev_err(&spi->dev, - "could not set gpio %i as output: %i\n", - spi->cs_gpio, ret); - - return ret; - } + + if (spi->cs_gpiod) + return 0; /* for dt-backwards compatibility: only support native on CS0 * known things not supported with broken native CS: @@ -519,6 +505,7 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) master->prepare_message = bcm2835aux_spi_prepare_message; master->unprepare_message = bcm2835aux_spi_unprepare_message; master->dev.of_node = pdev->dev.of_node; + master->use_gpio_descriptors = true; bs = spi_master_get_devdata(master); -- GitLab From 941bffd7d7f5d6030a54184c5d81b0eb9116ca9a Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:23 +0000 Subject: [PATCH 0132/1586] spi: Make spi_alloc_device and spi_add_device public again This functions were previously made private since they were not used. However, these functions will be needed again. Partial revert of commit da21fde0fdb3 ("spi: Make several public functions private to spi.c") Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 ++++-- include/linux/spi/spi.h | 12 ++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4599b121d7442..1eb84101c4ad8 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -532,7 +532,7 @@ static DEFINE_MUTEX(board_lock); * * Return: a pointer to the new device, or NULL. */ -static struct spi_device *spi_alloc_device(struct spi_controller *ctlr) +struct spi_device *spi_alloc_device(struct spi_controller *ctlr) { struct spi_device *spi; @@ -557,6 +557,7 @@ static struct spi_device *spi_alloc_device(struct spi_controller *ctlr) device_initialize(&spi->dev); return spi; } +EXPORT_SYMBOL_GPL(spi_alloc_device); static void spi_dev_set_name(struct spi_device *spi) { @@ -652,7 +653,7 @@ static int __spi_add_device(struct spi_device *spi) * * Return: 0 on success; negative errno on failure */ -static int spi_add_device(struct spi_device *spi) +int spi_add_device(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; struct device *dev = ctlr->dev.parent; @@ -673,6 +674,7 @@ static int spi_add_device(struct spi_device *spi) mutex_unlock(&ctlr->add_lock); return status; } +EXPORT_SYMBOL_GPL(spi_add_device); static int spi_add_device_locked(struct spi_device *spi) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b8043..0346a3ff27fd8 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1452,7 +1452,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). + * + * You can also use spi_alloc_device() and spi_add_device() to use a two + * stage registration sequence for each spi_device. This gives the caller + * some more control over the spi_device structure before it is registered, + * but requires that caller to initialize fields that would otherwise + * be defined using the board info. */ +extern struct spi_device * +spi_alloc_device(struct spi_controller *ctlr); + +extern int +spi_add_device(struct spi_device *spi); + extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); -- GitLab From 70dd264bc07aee4f89e65138db11e908701388dd Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:24 +0000 Subject: [PATCH 0133/1586] spi: Create helper API to lookup ACPI info for spi device This can then be used to find a spi resource inside an ACPI node, and allocate a spi device. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 46 ++++++++++++++++++++++++++++++++--------- include/linux/spi/spi.h | 6 ++++++ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 1eb84101c4ad8..13f4701f0694d 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2410,8 +2410,18 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) return 1; } -static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, - struct acpi_device *adev) +/** + * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information + * @ctlr: controller to which the spi device belongs + * @adev: ACPI Device for the spi device + * + * This should be used to allocate a new spi device from and ACPI Node. + * The caller is responsible for calling spi_add_device to register the spi device. + * + * Return: a pointer to the new device, or ERR_PTR on error. + */ +struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev) { acpi_handle parent_handle = NULL; struct list_head resource_list; @@ -2419,10 +2429,6 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, struct spi_device *spi; int ret; - if (acpi_bus_get_status(adev) || !adev->status.present || - acpi_device_enumerated(adev)) - return AE_OK; - lookup.ctlr = ctlr; lookup.irq = -1; @@ -2433,7 +2439,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, if (ret < 0) /* found SPI in _CRS but it points to another controller */ - return AE_OK; + return ERR_PTR(-ENODEV); if (!lookup.max_speed_hz && ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) && @@ -2443,16 +2449,15 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, } if (!lookup.max_speed_hz) - return AE_OK; + return ERR_PTR(-ENODEV); spi = spi_alloc_device(ctlr); if (!spi) { dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n", dev_name(&adev->dev)); - return AE_NO_MEMORY; + return ERR_PTR(-ENOMEM); } - ACPI_COMPANION_SET(&spi->dev, adev); spi->max_speed_hz = lookup.max_speed_hz; spi->mode |= lookup.mode; @@ -2460,6 +2465,27 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, spi->bits_per_word = lookup.bits_per_word; spi->chip_select = lookup.chip_select; + return spi; +} +EXPORT_SYMBOL_GPL(acpi_spi_device_alloc); + +static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, + struct acpi_device *adev) +{ + struct spi_device *spi; + + if (acpi_bus_get_status(adev) || !adev->status.present || + acpi_device_enumerated(adev)) + return AE_OK; + + spi = acpi_spi_device_alloc(ctlr, adev); + if (IS_ERR(spi)) { + if (PTR_ERR(spi) == -ENOMEM) + return AE_NO_MEMORY; + else + return AE_OK; + } + acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias, sizeof(spi->modalias)); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0346a3ff27fd8..d159cef12f1a9 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -16,6 +16,7 @@ #include #include +#include struct dma_chan; struct software_node; @@ -759,6 +760,11 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); +#if IS_ENABLED(CONFIG_ACPI) +extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev); +#endif + /* * SPI resource management while processing a SPI message */ -- GitLab From 92640f98a78c6a3ea1ca32143144241eceb129bd Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:25 +0000 Subject: [PATCH 0134/1586] spi: Support selection of the index of the ACPI Spi Resource before alloc If a node contains more than one SPI resource it may be necessary to use an index to select which one you want to allocate a spi device for. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 51 +++++++++++++++++++++++++++++++++++------ include/linux/spi/spi.h | 3 ++- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 13f4701f0694d..06c0a308b38b3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2320,6 +2320,8 @@ struct acpi_spi_lookup { int irq; u8 bits_per_word; u8 chip_select; + int n; + int index; }; static void acpi_spi_parse_apple_properties(struct acpi_device *dev, @@ -2351,6 +2353,8 @@ static void acpi_spi_parse_apple_properties(struct acpi_device *dev, lookup->mode |= SPI_CPHA; } +static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); + static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) { struct acpi_spi_lookup *lookup = data; @@ -2364,14 +2368,35 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) sb = &ares->data.spi_serial_bus; if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) { + if (lookup->index != -1 && lookup->n++ != lookup->index) + return 1; + + if (lookup->index == -1 && !ctlr) + return -ENODEV; + status = acpi_get_handle(NULL, sb->resource_source.string_ptr, &parent_handle); - if (ACPI_FAILURE(status) || - ACPI_HANDLE(ctlr->dev.parent) != parent_handle) + if (ACPI_FAILURE(status)) return -ENODEV; + if (ctlr) { + if (ACPI_HANDLE(ctlr->dev.parent) != parent_handle) + return -ENODEV; + } else { + struct acpi_device *adev; + + if (acpi_bus_get_device(parent_handle, &adev)) + return -ENODEV; + + ctlr = acpi_spi_find_controller_by_adev(adev); + if (!ctlr) + return -ENODEV; + + lookup->ctlr = ctlr; + } + /* * ACPI DeviceSelection numbering is handled by the * host controller driver in Windows and can vary @@ -2414,14 +2439,21 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information * @ctlr: controller to which the spi device belongs * @adev: ACPI Device for the spi device + * @index: Index of the spi resource inside the ACPI Node * * This should be used to allocate a new spi device from and ACPI Node. * The caller is responsible for calling spi_add_device to register the spi device. * + * If ctlr is set to NULL, the Controller for the spi device will be looked up + * using the resource. + * If index is set to -1, index is not used. + * Note: If index is -1, ctlr must be set. + * * Return: a pointer to the new device, or ERR_PTR on error. */ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, - struct acpi_device *adev) + struct acpi_device *adev, + int index) { acpi_handle parent_handle = NULL; struct list_head resource_list; @@ -2429,8 +2461,13 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct spi_device *spi; int ret; + if (!ctlr && index == -1) + return ERR_PTR(-EINVAL); + lookup.ctlr = ctlr; lookup.irq = -1; + lookup.index = index; + lookup.n = 0; INIT_LIST_HEAD(&resource_list); ret = acpi_dev_get_resources(adev, &resource_list, @@ -2443,7 +2480,7 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, if (!lookup.max_speed_hz && ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) && - ACPI_HANDLE(ctlr->dev.parent) == parent_handle) { + ACPI_HANDLE(lookup.ctlr->dev.parent) == parent_handle) { /* Apple does not use _CRS but nested devices for SPI slaves */ acpi_spi_parse_apple_properties(adev, &lookup); } @@ -2451,9 +2488,9 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, if (!lookup.max_speed_hz) return ERR_PTR(-ENODEV); - spi = spi_alloc_device(ctlr); + spi = spi_alloc_device(lookup.ctlr); if (!spi) { - dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n", + dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n", dev_name(&adev->dev)); return ERR_PTR(-ENOMEM); } @@ -2478,7 +2515,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, acpi_device_enumerated(adev)) return AE_OK; - spi = acpi_spi_device_alloc(ctlr, adev); + spi = acpi_spi_device_alloc(ctlr, adev, -1); if (IS_ERR(spi)) { if (PTR_ERR(spi) == -ENOMEM) return AE_NO_MEMORY; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d159cef12f1a9..e5bbb9cbd3d7a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -762,7 +762,8 @@ extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, - struct acpi_device *adev); + struct acpi_device *adev, + int index); #endif /* -- GitLab From 113962301d2d9a5c11381d9c25ddea7af71be2ff Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:26 +0000 Subject: [PATCH 0135/1586] spi: Add API to count spi acpi resources Some ACPI nodes may have more than one Spi Resource. To be able to handle these case, its necessary to have a way of counting these resources. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-5-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/spi/spi.h | 1 + 2 files changed, 41 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 06c0a308b38b3..ec9f2ed579e34 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2324,6 +2324,46 @@ struct acpi_spi_lookup { int index; }; +static int acpi_spi_count(struct acpi_resource *ares, void *data) +{ + struct acpi_resource_spi_serialbus *sb; + int *count = data; + + if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) + return 1; + + sb = &ares->data.spi_serial_bus; + if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI) + return 1; + + *count = *count + 1; + + return 1; +} + +/** + * acpi_spi_count_resources - Count the number of SpiSerialBus resources + * @adev: ACPI device + * + * Returns the number of SpiSerialBus resources in the ACPI-device's + * resource-list; or a negative error code. + */ +int acpi_spi_count_resources(struct acpi_device *adev) +{ + LIST_HEAD(r); + int count = 0; + int ret; + + ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&r); + + return count; +} +EXPORT_SYMBOL_GPL(acpi_spi_count_resources); + static void acpi_spi_parse_apple_properties(struct acpi_device *dev, struct acpi_spi_lookup *lookup) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e5bbb9cbd3d7a..394b4241d9890 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -764,6 +764,7 @@ extern void spi_unregister_controller(struct spi_controller *ctlr); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); +int acpi_spi_count_resources(struct acpi_device *adev); #endif /* -- GitLab From e3dc1399506f894110667ee5c66a6a70f06f3348 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:23 +0000 Subject: [PATCH 0136/1586] spi: Make spi_alloc_device and spi_add_device public again This functions were previously made private since they were not used. However, these functions will be needed again. Partial revert of commit da21fde0fdb3 ("spi: Make several public functions private to spi.c") Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 ++++-- include/linux/spi/spi.h | 12 ++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4599b121d7442..1eb84101c4ad8 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -532,7 +532,7 @@ static DEFINE_MUTEX(board_lock); * * Return: a pointer to the new device, or NULL. */ -static struct spi_device *spi_alloc_device(struct spi_controller *ctlr) +struct spi_device *spi_alloc_device(struct spi_controller *ctlr) { struct spi_device *spi; @@ -557,6 +557,7 @@ static struct spi_device *spi_alloc_device(struct spi_controller *ctlr) device_initialize(&spi->dev); return spi; } +EXPORT_SYMBOL_GPL(spi_alloc_device); static void spi_dev_set_name(struct spi_device *spi) { @@ -652,7 +653,7 @@ static int __spi_add_device(struct spi_device *spi) * * Return: 0 on success; negative errno on failure */ -static int spi_add_device(struct spi_device *spi) +int spi_add_device(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; struct device *dev = ctlr->dev.parent; @@ -673,6 +674,7 @@ static int spi_add_device(struct spi_device *spi) mutex_unlock(&ctlr->add_lock); return status; } +EXPORT_SYMBOL_GPL(spi_add_device); static int spi_add_device_locked(struct spi_device *spi) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b8043..0346a3ff27fd8 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1452,7 +1452,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). + * + * You can also use spi_alloc_device() and spi_add_device() to use a two + * stage registration sequence for each spi_device. This gives the caller + * some more control over the spi_device structure before it is registered, + * but requires that caller to initialize fields that would otherwise + * be defined using the board info. */ +extern struct spi_device * +spi_alloc_device(struct spi_controller *ctlr); + +extern int +spi_add_device(struct spi_device *spi); + extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); -- GitLab From 000bee0ed70af79e610444096fb453430220960f Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:24 +0000 Subject: [PATCH 0137/1586] spi: Create helper API to lookup ACPI info for spi device This can then be used to find a spi resource inside an ACPI node, and allocate a spi device. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 46 ++++++++++++++++++++++++++++++++--------- include/linux/spi/spi.h | 6 ++++++ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 1eb84101c4ad8..13f4701f0694d 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2410,8 +2410,18 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) return 1; } -static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, - struct acpi_device *adev) +/** + * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information + * @ctlr: controller to which the spi device belongs + * @adev: ACPI Device for the spi device + * + * This should be used to allocate a new spi device from and ACPI Node. + * The caller is responsible for calling spi_add_device to register the spi device. + * + * Return: a pointer to the new device, or ERR_PTR on error. + */ +struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev) { acpi_handle parent_handle = NULL; struct list_head resource_list; @@ -2419,10 +2429,6 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, struct spi_device *spi; int ret; - if (acpi_bus_get_status(adev) || !adev->status.present || - acpi_device_enumerated(adev)) - return AE_OK; - lookup.ctlr = ctlr; lookup.irq = -1; @@ -2433,7 +2439,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, if (ret < 0) /* found SPI in _CRS but it points to another controller */ - return AE_OK; + return ERR_PTR(-ENODEV); if (!lookup.max_speed_hz && ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) && @@ -2443,16 +2449,15 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, } if (!lookup.max_speed_hz) - return AE_OK; + return ERR_PTR(-ENODEV); spi = spi_alloc_device(ctlr); if (!spi) { dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n", dev_name(&adev->dev)); - return AE_NO_MEMORY; + return ERR_PTR(-ENOMEM); } - ACPI_COMPANION_SET(&spi->dev, adev); spi->max_speed_hz = lookup.max_speed_hz; spi->mode |= lookup.mode; @@ -2460,6 +2465,27 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, spi->bits_per_word = lookup.bits_per_word; spi->chip_select = lookup.chip_select; + return spi; +} +EXPORT_SYMBOL_GPL(acpi_spi_device_alloc); + +static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, + struct acpi_device *adev) +{ + struct spi_device *spi; + + if (acpi_bus_get_status(adev) || !adev->status.present || + acpi_device_enumerated(adev)) + return AE_OK; + + spi = acpi_spi_device_alloc(ctlr, adev); + if (IS_ERR(spi)) { + if (PTR_ERR(spi) == -ENOMEM) + return AE_NO_MEMORY; + else + return AE_OK; + } + acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias, sizeof(spi->modalias)); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0346a3ff27fd8..d159cef12f1a9 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -16,6 +16,7 @@ #include #include +#include struct dma_chan; struct software_node; @@ -759,6 +760,11 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); +#if IS_ENABLED(CONFIG_ACPI) +extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev); +#endif + /* * SPI resource management while processing a SPI message */ -- GitLab From 87e59b36e5e26122efd55d77adb9fac827987db0 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:25 +0000 Subject: [PATCH 0138/1586] spi: Support selection of the index of the ACPI Spi Resource before alloc If a node contains more than one SPI resource it may be necessary to use an index to select which one you want to allocate a spi device for. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 51 +++++++++++++++++++++++++++++++++++------ include/linux/spi/spi.h | 3 ++- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 13f4701f0694d..06c0a308b38b3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2320,6 +2320,8 @@ struct acpi_spi_lookup { int irq; u8 bits_per_word; u8 chip_select; + int n; + int index; }; static void acpi_spi_parse_apple_properties(struct acpi_device *dev, @@ -2351,6 +2353,8 @@ static void acpi_spi_parse_apple_properties(struct acpi_device *dev, lookup->mode |= SPI_CPHA; } +static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); + static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) { struct acpi_spi_lookup *lookup = data; @@ -2364,14 +2368,35 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) sb = &ares->data.spi_serial_bus; if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) { + if (lookup->index != -1 && lookup->n++ != lookup->index) + return 1; + + if (lookup->index == -1 && !ctlr) + return -ENODEV; + status = acpi_get_handle(NULL, sb->resource_source.string_ptr, &parent_handle); - if (ACPI_FAILURE(status) || - ACPI_HANDLE(ctlr->dev.parent) != parent_handle) + if (ACPI_FAILURE(status)) return -ENODEV; + if (ctlr) { + if (ACPI_HANDLE(ctlr->dev.parent) != parent_handle) + return -ENODEV; + } else { + struct acpi_device *adev; + + if (acpi_bus_get_device(parent_handle, &adev)) + return -ENODEV; + + ctlr = acpi_spi_find_controller_by_adev(adev); + if (!ctlr) + return -ENODEV; + + lookup->ctlr = ctlr; + } + /* * ACPI DeviceSelection numbering is handled by the * host controller driver in Windows and can vary @@ -2414,14 +2439,21 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information * @ctlr: controller to which the spi device belongs * @adev: ACPI Device for the spi device + * @index: Index of the spi resource inside the ACPI Node * * This should be used to allocate a new spi device from and ACPI Node. * The caller is responsible for calling spi_add_device to register the spi device. * + * If ctlr is set to NULL, the Controller for the spi device will be looked up + * using the resource. + * If index is set to -1, index is not used. + * Note: If index is -1, ctlr must be set. + * * Return: a pointer to the new device, or ERR_PTR on error. */ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, - struct acpi_device *adev) + struct acpi_device *adev, + int index) { acpi_handle parent_handle = NULL; struct list_head resource_list; @@ -2429,8 +2461,13 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct spi_device *spi; int ret; + if (!ctlr && index == -1) + return ERR_PTR(-EINVAL); + lookup.ctlr = ctlr; lookup.irq = -1; + lookup.index = index; + lookup.n = 0; INIT_LIST_HEAD(&resource_list); ret = acpi_dev_get_resources(adev, &resource_list, @@ -2443,7 +2480,7 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, if (!lookup.max_speed_hz && ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) && - ACPI_HANDLE(ctlr->dev.parent) == parent_handle) { + ACPI_HANDLE(lookup.ctlr->dev.parent) == parent_handle) { /* Apple does not use _CRS but nested devices for SPI slaves */ acpi_spi_parse_apple_properties(adev, &lookup); } @@ -2451,9 +2488,9 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, if (!lookup.max_speed_hz) return ERR_PTR(-ENODEV); - spi = spi_alloc_device(ctlr); + spi = spi_alloc_device(lookup.ctlr); if (!spi) { - dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n", + dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n", dev_name(&adev->dev)); return ERR_PTR(-ENOMEM); } @@ -2478,7 +2515,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, acpi_device_enumerated(adev)) return AE_OK; - spi = acpi_spi_device_alloc(ctlr, adev); + spi = acpi_spi_device_alloc(ctlr, adev, -1); if (IS_ERR(spi)) { if (PTR_ERR(spi) == -ENOMEM) return AE_NO_MEMORY; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d159cef12f1a9..e5bbb9cbd3d7a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -762,7 +762,8 @@ extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, - struct acpi_device *adev); + struct acpi_device *adev, + int index); #endif /* -- GitLab From e612af7acef2459f1afd885f4107748995a05963 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 21 Jan 2022 17:24:26 +0000 Subject: [PATCH 0139/1586] spi: Add API to count spi acpi resources Some ACPI nodes may have more than one Spi Resource. To be able to handle these case, its necessary to have a way of counting these resources. Signed-off-by: Stefan Binding Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220121172431.6876-5-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/spi/spi.h | 1 + 2 files changed, 41 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 06c0a308b38b3..ec9f2ed579e34 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2324,6 +2324,46 @@ struct acpi_spi_lookup { int index; }; +static int acpi_spi_count(struct acpi_resource *ares, void *data) +{ + struct acpi_resource_spi_serialbus *sb; + int *count = data; + + if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) + return 1; + + sb = &ares->data.spi_serial_bus; + if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI) + return 1; + + *count = *count + 1; + + return 1; +} + +/** + * acpi_spi_count_resources - Count the number of SpiSerialBus resources + * @adev: ACPI device + * + * Returns the number of SpiSerialBus resources in the ACPI-device's + * resource-list; or a negative error code. + */ +int acpi_spi_count_resources(struct acpi_device *adev) +{ + LIST_HEAD(r); + int count = 0; + int ret; + + ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&r); + + return count; +} +EXPORT_SYMBOL_GPL(acpi_spi_count_resources); + static void acpi_spi_parse_apple_properties(struct acpi_device *dev, struct acpi_spi_lookup *lookup) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e5bbb9cbd3d7a..394b4241d9890 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -764,6 +764,7 @@ extern void spi_unregister_controller(struct spi_controller *ctlr); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); +int acpi_spi_count_resources(struct acpi_device *adev); #endif /* -- GitLab From b8b87fd954b4b1bdd2d739c8f50bf685351a1a94 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 31 Jan 2022 13:57:36 -0500 Subject: [PATCH 0140/1586] selinux: Fix selinux_sb_mnt_opts_compat() selinux_sb_mnt_opts_compat() is called under the sb_lock spinlock and shouldn't be performing any memory allocations. Fix this by parsing the sids at the same time we're chopping up the security mount options string and then using the pre-parsed sids when doing the comparison. Fixes: cc274ae7763d ("selinux: fix sleeping function called from invalid context") Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount") Signed-off-by: Scott Mayhew Signed-off-by: Paul Moore --- security/selinux/hooks.c | 75 ++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9e3658e9e7ca7..85282ccc748f3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -341,6 +341,10 @@ static void inode_free_security(struct inode *inode) struct selinux_mnt_opts { const char *fscontext, *context, *rootcontext, *defcontext; + u32 fscontext_sid; + u32 context_sid; + u32 rootcontext_sid; + u32 defcontext_sid; }; static void selinux_free_mnt_opts(void *mnt_opts) @@ -597,15 +601,14 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid, - gfp_t gfp) +static int parse_sid(struct super_block *sb, const char *s, u32 *sid) { int rc = security_context_str_to_sid(&selinux_state, s, - sid, gfp); + sid, GFP_KERNEL); if (rc) pr_warn("SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", - s, sb->s_id, sb->s_type->name, rc); + s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc); return rc; } @@ -672,8 +675,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->fscontext, &fscontext_sid); if (rc) goto out; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, @@ -682,8 +684,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->context, &context_sid); if (rc) goto out; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, @@ -692,8 +693,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); if (rc) goto out; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, @@ -702,8 +702,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->defcontext, &defcontext_sid); if (rc) goto out; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, @@ -995,21 +994,29 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) if (opts->context || opts->defcontext) goto err; opts->context = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->context_sid); break; case Opt_fscontext: if (opts->fscontext) goto err; opts->fscontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->fscontext_sid); break; case Opt_rootcontext: if (opts->rootcontext) goto err; opts->rootcontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->rootcontext_sid); break; case Opt_defcontext: if (opts->context || opts->defcontext) goto err; opts->defcontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->defcontext_sid); break; } @@ -2647,8 +2654,6 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) { struct selinux_mnt_opts *opts = mnt_opts; struct superblock_security_struct *sbsec = selinux_superblock(sb); - u32 sid; - int rc; /* * Superblock not initialized (i.e. no options) - reject if any @@ -2665,34 +2670,36 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT); - if (rc) + if (opts->fscontext_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) + else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, + opts->fscontext_sid)) return 1; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT); - if (rc) + if (opts->context_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) + else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, + opts->context_sid)) return 1; } if (opts->rootcontext) { - struct inode_security_struct *root_isec; - - root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT); - if (rc) - return 1; - if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) + if (opts->rootcontext_sid == SECSID_NULL) return 1; + else { + struct inode_security_struct *root_isec; + + root_isec = backing_inode_security(sb->s_root); + if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, + opts->rootcontext_sid)) + return 1; + } } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT); - if (rc) + if (opts->defcontext_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) + else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, + opts->defcontext_sid)) return 1; } return 0; @@ -2712,14 +2719,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->fscontext, &sid); if (rc) return rc; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->context, &sid); if (rc) return rc; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2728,14 +2735,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->rootcontext, &sid); if (rc) return rc; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->defcontext, &sid); if (rc) return rc; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) -- GitLab From 6bc1968c14e91e03c0851b9c5c5330d91305a853 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 31 Jan 2022 13:57:37 -0500 Subject: [PATCH 0141/1586] selinux: try to use preparsed sid before calling parse_sid() Avoid unnecessary parsing of sids that have already been parsed via selinux_sb_eat_lsm_opts(). Signed-off-by: Scott Mayhew Signed-off-by: Paul Moore --- security/selinux/hooks.c | 88 +++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 85282ccc748f3..b60481192b380 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -675,36 +675,48 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid); - if (rc) - goto out; + if (opts->fscontext_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->fscontext, &fscontext_sid); + if (rc) + goto out; + } else + fscontext_sid = opts->fscontext_sid; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, fscontext_sid)) goto out_double_mount; sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid); - if (rc) - goto out; + if (opts->context_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->context, &context_sid); + if (rc) + goto out; + } else + context_sid = opts->context_sid; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, context_sid)) goto out_double_mount; sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); - if (rc) - goto out; + if (opts->rootcontext_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); + if (rc) + goto out; + } else + rootcontext_sid = opts->rootcontext_sid; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, rootcontext_sid)) goto out_double_mount; sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid); - if (rc) - goto out; + if (opts->defcontext_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->defcontext, &defcontext_sid); + if (rc) + goto out; + } else + defcontext_sid = opts->defcontext_sid; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, defcontext_sid)) goto out_double_mount; @@ -2709,7 +2721,6 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) { struct selinux_mnt_opts *opts = mnt_opts; struct superblock_security_struct *sbsec = selinux_superblock(sb); - u32 sid; int rc; if (!(sbsec->flags & SE_SBINITIALIZED)) @@ -2719,33 +2730,48 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); - if (rc) - return rc; - if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) + if (opts->fscontext_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->fscontext, + &opts->fscontext_sid); + if (rc) + return rc; + } + if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, + opts->fscontext_sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); - if (rc) - return rc; - if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) + if (opts->context_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->context, &opts->context_sid); + if (rc) + return rc; + } + if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, + opts->context_sid)) goto out_bad_option; } if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); - if (rc) - return rc; - if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) + if (opts->rootcontext_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->rootcontext, + &opts->rootcontext_sid); + if (rc) + return rc; + } + if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, + opts->rootcontext_sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); - if (rc) - return rc; - if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) + if (opts->defcontext_sid == SECSID_NULL) { + rc = parse_sid(sb, opts->defcontext, + &opts->defcontext_sid); + if (rc) + return rc; + } + if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, + opts->defcontext_sid)) goto out_bad_option; } return 0; @@ -2843,6 +2869,10 @@ static int selinux_fs_context_dup(struct fs_context *fc, if (!opts->defcontext) return -ENOMEM; } + opts->fscontext_sid = src->fscontext_sid; + opts->context_sid = src->context_sid; + opts->rootcontext_sid = src->rootcontext_sid; + opts->defcontext_sid = src->defcontext_sid; return 0; } -- GitLab From dc6cdced47ce1edbca79bdc5a3cd4e435b5358dd Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Sat, 25 Dec 2021 09:51:46 +0800 Subject: [PATCH 0142/1586] docs/zh_CN: Add rbtree Chinese translation Translate core-api/rbtree.rst into Chinese. Signed-off-by: Tang Yizhou Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/20211225015146.1535-1-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/core-api/index.rst | 2 +- .../translations/zh_CN/core-api/rbtree.rst | 391 ++++++++++++++++++ 2 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/core-api/rbtree.rst diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst index d10191c45cf15..26d9913fc8b60 100644 --- a/Documentation/translations/zh_CN/core-api/index.rst +++ b/Documentation/translations/zh_CN/core-api/index.rst @@ -42,6 +42,7 @@ kref assoc_array xarray + rbtree Todolist: @@ -49,7 +50,6 @@ Todolist: idr circular-buffers - rbtree generic-radix-tree packing bus-virt-phys-mapping diff --git a/Documentation/translations/zh_CN/core-api/rbtree.rst b/Documentation/translations/zh_CN/core-api/rbtree.rst new file mode 100644 index 0000000000000..a3e1555cb974d --- /dev/null +++ b/Documentation/translations/zh_CN/core-api/rbtree.rst @@ -0,0 +1,391 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/core-api/rbtree.rst + +:翻译: + + 唐艺舟 Tang Yizhou + +========================= +Linux中的红黑树(rbtree) +========================= + + +:日期: 2007年1月18日 +:作者: Rob Landley + +何为红黑树,它们有什么用? +-------------------------- + +红黑树是一种自平衡二叉搜索树,被用来存储可排序的键/值数据对。这与基数树(被用来高效 +存储稀疏数组,因此使用长整型下标来插入/访问/删除结点)和哈希表(没有保持排序因而无法 +容易地按序遍历,同时必须调节其大小和哈希函数,然而红黑树可以优雅地伸缩以便存储任意 +数量的键)不同。 + +红黑树和AVL树类似,但在插入和删除时提供了更快的实时有界的最坏情况性能(分别最多两次 +旋转和三次旋转,来平衡树),查询时间轻微变慢(但时间复杂度仍然是O(log n))。 + +引用Linux每周新闻(Linux Weekly News): + + 内核中有多处红黑树的使用案例。最后期限调度器和完全公平排队(CFQ)I/O调度器利用 + 红黑树跟踪请求;数据包CD/DVD驱动程序也是如此。高精度时钟代码使用一颗红黑树组织 + 未完成的定时器请求。ext3文件系统用红黑树跟踪目录项。虚拟内存区域(VMAs)、epoll + 文件描述符、密码学密钥和在“分层令牌桶”调度器中的网络数据包都由红黑树跟踪。 + +本文档涵盖了对Linux红黑树实现的使用方法。更多关于红黑树的性质和实现的信息,参见: + + Linux每周新闻关于红黑树的文章 + https://lwn.net/Articles/184495/ + + 维基百科红黑树词条 + https://en.wikipedia.org/wiki/Red-black_tree + +红黑树的Linux实现 +----------------- + +Linux的红黑树实现在文件“lib/rbtree.c”中。要使用它,需要“#include ”。 + +Linux的红黑树实现对速度进行了优化,因此比传统的实现少一个间接层(有更好的缓存局部性)。 +每个rb_node结构体的实例嵌入在它管理的数据结构中,因此不需要靠指针来分离rb_node和它 +管理的数据结构。用户应该编写他们自己的树搜索和插入函数,来调用已提供的红黑树函数, +而不是使用一个比较回调函数指针。加锁代码也留给红黑树的用户编写。 + +创建一颗红黑树 +-------------- + +红黑树中的数据结点是包含rb_node结构体成员的结构体:: + + struct mytype { + struct rb_node node; + char *keystring; + }; + +当处理一个指向内嵌rb_node结构体的指针时,包住rb_node的结构体可用标准的container_of() +宏访问。此外,个体成员可直接用rb_entry(node, type, member)访问。 + +每颗红黑树的根是一个rb_root数据结构,它由以下方式初始化为空: + + struct rb_root mytree = RB_ROOT; + +在一颗红黑树中搜索值 +-------------------- + +为你的树写一个搜索函数是相当简单的:从树根开始,比较每个值,然后根据需要继续前往左边或 +右边的分支。 + +示例:: + + struct mytype *my_search(struct rb_root *root, char *string) + { + struct rb_node *node = root->rb_node; + + while (node) { + struct mytype *data = container_of(node, struct mytype, node); + int result; + + result = strcmp(string, data->keystring); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + return NULL; + } + +在一颗红黑树中插入数据 +---------------------- + +在树中插入数据的步骤包括:首先搜索插入新结点的位置,然后插入结点并对树再平衡 +("recoloring")。 + +插入的搜索和上文的搜索不同,它要找到嫁接新结点的位置。新结点也需要一个指向它的父节点 +的链接,以达到再平衡的目的。 + +示例:: + + int my_insert(struct rb_root *root, struct mytype *data) + { + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct mytype *this = container_of(*new, struct mytype, node); + int result = strcmp(data->keystring, this->keystring); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return FALSE; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); + + return TRUE; + } + +在一颗红黑树中删除或替换已经存在的数据 +-------------------------------------- + +若要从树中删除一个已经存在的结点,调用:: + + void rb_erase(struct rb_node *victim, struct rb_root *tree); + +示例:: + + struct mytype *data = mysearch(&mytree, "walrus"); + + if (data) { + rb_erase(&data->node, &mytree); + myfree(data); + } + +若要用一个新结点替换树中一个已经存在的键值相同的结点,调用:: + + void rb_replace_node(struct rb_node *old, struct rb_node *new, + struct rb_root *tree); + +通过这种方式替换结点不会对树做重排序:如果新结点的键值和旧结点不同,红黑树可能被 +破坏。 + +(按排序的顺序)遍历存储在红黑树中的元素 +---------------------------------------- + +我们提供了四个函数,用于以排序的方式遍历一颗红黑树的内容。这些函数可以在任意红黑树 +上工作,并且不需要被修改或包装(除非加锁的目的):: + + struct rb_node *rb_first(struct rb_root *tree); + struct rb_node *rb_last(struct rb_root *tree); + struct rb_node *rb_next(struct rb_node *node); + struct rb_node *rb_prev(struct rb_node *node); + +要开始迭代,需要使用一个指向树根的指针调用rb_first()或rb_last(),它将返回一个指向 +树中第一个或最后一个元素所包含的节点结构的指针。要继续的话,可以在当前结点上调用 +rb_next()或rb_prev()来获取下一个或上一个结点。当没有剩余的结点时,将返回NULL。 + +迭代器函数返回一个指向被嵌入的rb_node结构体的指针,由此,包住rb_node的结构体可用 +标准的container_of()宏访问。此外,个体成员可直接用rb_entry(node, type, member) +访问。 + +示例:: + + struct rb_node *node; + for (node = rb_first(&mytree); node; node = rb_next(node)) + printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring); + +带缓存的红黑树 +-------------- + +计算最左边(最小的)结点是二叉搜索树的一个相当常见的任务,例如用于遍历,或用户根据 +他们自己的逻辑依赖一个特定的顺序。为此,用户可以使用'struct rb_root_cached'来优化 +时间复杂度为O(logN)的rb_first()的调用,以简单地获取指针,避免了潜在的昂贵的树迭代。 +维护操作的额外运行时间开销可忽略,不过内存占用较大。 + +和rb_root结构体类似,带缓存的红黑树由以下方式初始化为空:: + + struct rb_root_cached mytree = RB_ROOT_CACHED; + +带缓存的红黑树只是一个常规的rb_root,加上一个额外的指针来缓存最左边的节点。这使得 +rb_root_cached可以存在于rb_root存在的任何地方,并且只需增加几个接口来支持带缓存的 +树:: + + struct rb_node *rb_first_cached(struct rb_root_cached *tree); + void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool); + void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); + +操作和删除也有对应的带缓存的树的调用:: + + void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *, + bool, struct rb_augment_callbacks *); + void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *, + struct rb_augment_callbacks *); + + +对增强型红黑树的支持 +-------------------- + +增强型红黑树是一种在每个结点里存储了“一些”附加数据的红黑树,其中结点N的附加数据 +必须是以N为根的子树中所有结点的内容的函数。它是建立在红黑树基础设施之上的可选特性。 +想要使用这个特性的红黑树用户,插入和删除结点时必须调用增强型接口并提供增强型回调函数。 + +实现增强型红黑树操作的C文件必须包含而不是。 +注意,linux/rbtree_augmented.h暴露了一些红黑树实现的细节而你不应依赖它们,请坚持 +使用文档记录的API,并且不要在头文件中包含,以最小化你的 +用户意外地依赖这些实现细节的可能。 + +插入时,用户必须更新通往被插入节点的路径上的增强信息,然后像往常一样调用rb_link_node(), +然后是rb_augment_inserted()而不是平时的rb_insert_color()调用。如果 +rb_augment_inserted()再平衡了红黑树,它将回调至一个用户提供的函数来更新受影响的 +子树上的增强信息。 + +删除一个结点时,用户必须调用rb_erase_augmented()而不是rb_erase()。 +rb_erase_augmented()回调至一个用户提供的函数来更新受影响的子树上的增强信息。 + +在两种情况下,回调都是通过rb_augment_callbacks结构体提供的。必须定义3个回调: + +- 一个传播回调,它更新一个给定结点和它的祖先们的增强数据,直到一个给定的停止点 + (如果是NULL,将更新一路更新到树根)。 + +- 一个复制回调,它将一颗给定子树的增强数据复制到一个新指定的子树树根。 + +- 一个树旋转回调,它将一颗给定的子树的增强值复制到新指定的子树树根上,并重新计算 + 先前的子树树根的增强值。 + +rb_erase_augmented()编译后的代码可能会内联传播、复制回调,这将导致函数体积更大, +因此每个增强型红黑树的用户应该只有一个rb_erase_augmented()的调用点,以限制编译后 +的代码大小。 + + +使用示例 +^^^^^^^^ + +区间树是增强型红黑树的一个例子。参考Cormen,Leiserson,Rivest和Stein写的 +《算法导论》。区间树的更多细节: + +经典的红黑树只有一个键,它不能直接用来存储像[lo:hi]这样的区间范围,也不能快速查找 +与新的lo:hi重叠的部分,或者查找是否有与新的lo:hi完全匹配的部分。 + +然而,红黑树可以被增强,以一种结构化的方式来存储这种区间范围,从而使高效的查找和 +精确匹配成为可能。 + +这个存储在每个节点中的“额外信息”是其所有后代结点中的最大hi(max_hi)值。这个信息 +可以保持在每个结点上,只需查看一下该结点和它的直系子结点们。这将被用于时间复杂度 +为O(log n)的最低匹配查找(所有可能的匹配中最低的起始地址),就像这样:: + + struct interval_tree_node * + interval_tree_first_match(struct rb_root *root, + unsigned long start, unsigned long last) + { + struct interval_tree_node *node; + + if (!root->rb_node) + return NULL; + node = rb_entry(root->rb_node, struct interval_tree_node, rb); + + while (true) { + if (node->rb.rb_left) { + struct interval_tree_node *left = + rb_entry(node->rb.rb_left, + struct interval_tree_node, rb); + if (left->__subtree_last >= start) { + /* + * Some nodes in left subtree satisfy Cond2. + * Iterate to find the leftmost such node N. + * If it also satisfies Cond1, that's the match + * we are looking for. Otherwise, there is no + * matching interval as nodes to the right of N + * can't satisfy Cond1 either. + */ + node = left; + continue; + } + } + if (node->start <= last) { /* Cond1 */ + if (node->last >= start) /* Cond2 */ + return node; /* node is leftmost match */ + if (node->rb.rb_right) { + node = rb_entry(node->rb.rb_right, + struct interval_tree_node, rb); + if (node->__subtree_last >= start) + continue; + } + } + return NULL; /* No match */ + } + } + +插入/删除是通过以下增强型回调来定义的:: + + static inline unsigned long + compute_subtree_last(struct interval_tree_node *node) + { + unsigned long max = node->last, subtree_last; + if (node->rb.rb_left) { + subtree_last = rb_entry(node->rb.rb_left, + struct interval_tree_node, rb)->__subtree_last; + if (max < subtree_last) + max = subtree_last; + } + if (node->rb.rb_right) { + subtree_last = rb_entry(node->rb.rb_right, + struct interval_tree_node, rb)->__subtree_last; + if (max < subtree_last) + max = subtree_last; + } + return max; + } + + static void augment_propagate(struct rb_node *rb, struct rb_node *stop) + { + while (rb != stop) { + struct interval_tree_node *node = + rb_entry(rb, struct interval_tree_node, rb); + unsigned long subtree_last = compute_subtree_last(node); + if (node->__subtree_last == subtree_last) + break; + node->__subtree_last = subtree_last; + rb = rb_parent(&node->rb); + } + } + + static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new) + { + struct interval_tree_node *old = + rb_entry(rb_old, struct interval_tree_node, rb); + struct interval_tree_node *new = + rb_entry(rb_new, struct interval_tree_node, rb); + + new->__subtree_last = old->__subtree_last; + } + + static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new) + { + struct interval_tree_node *old = + rb_entry(rb_old, struct interval_tree_node, rb); + struct interval_tree_node *new = + rb_entry(rb_new, struct interval_tree_node, rb); + + new->__subtree_last = old->__subtree_last; + old->__subtree_last = compute_subtree_last(old); + } + + static const struct rb_augment_callbacks augment_callbacks = { + augment_propagate, augment_copy, augment_rotate + }; + + void interval_tree_insert(struct interval_tree_node *node, + struct rb_root *root) + { + struct rb_node **link = &root->rb_node, *rb_parent = NULL; + unsigned long start = node->start, last = node->last; + struct interval_tree_node *parent; + + while (*link) { + rb_parent = *link; + parent = rb_entry(rb_parent, struct interval_tree_node, rb); + if (parent->__subtree_last < last) + parent->__subtree_last = last; + if (start < parent->start) + link = &parent->rb.rb_left; + else + link = &parent->rb.rb_right; + } + + node->__subtree_last = last; + rb_link_node(&node->rb, rb_parent, link); + rb_insert_augmented(&node->rb, root, &augment_callbacks); + } + + void interval_tree_remove(struct interval_tree_node *node, + struct rb_root *root) + { + rb_erase_augmented(&node->rb, root, &augment_callbacks); + } -- GitLab From 5dbbc145d24ac68fc481fc6fe0e4ec3d7a4b8007 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 21 Dec 2021 11:18:18 +0800 Subject: [PATCH 0143/1586] docs: scheduler: Fix outdated parameter of rebalance_domains According to the function prototype of rebalance_domains(), its first parameter is *rq* and the document need to be updated. Signed-off-by: Tang Yizhou Link: https://lore.kernel.org/r/20211221031818.23186-1-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- Documentation/scheduler/sched-domains.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst index 84dcdcd2911c6..e57ad28301bde 100644 --- a/Documentation/scheduler/sched-domains.rst +++ b/Documentation/scheduler/sched-domains.rst @@ -37,10 +37,10 @@ rebalancing event for the current runqueue has arrived. The actual load balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run in softirq context (SCHED_SOFTIRQ). -The latter function takes two arguments: the current CPU and whether it was idle -at the time the scheduler_tick() happened and iterates over all sched domains -our CPU is on, starting from its base domain and going up the ->parent chain. -While doing that, it checks to see if the current domain has exhausted its +The latter function takes two arguments: the runqueue of current CPU and whether +the CPU was idle at the time the scheduler_tick() happened and iterates over all +sched domains our CPU is on, starting from its base domain and going up the ->parent +chain. While doing that, it checks to see if the current domain has exhausted its rebalance interval. If so, it runs load_balance() on that domain. It then checks the parent sched_domain (if it exists), and the parent of the parent and so forth. -- GitLab From 0e326df069802dc48e4f095f889cb780e4beaba6 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 27 Jan 2022 10:56:13 -0500 Subject: [PATCH 0144/1586] selinux: various sparse fixes When running the SELinux code through sparse, there are a handful of warnings. This patch resolves some of these warnings caused by "__rcu" mismatches. % make W=1 C=1 security/selinux/ Signed-off-by: Paul Moore --- security/selinux/hooks.c | 6 +++--- security/selinux/ibpkey.c | 2 +- security/selinux/netnode.c | 5 +++-- security/selinux/netport.c | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b60481192b380..b24a1aeeedd43 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2553,7 +2553,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) if (rc) { clear_itimer(); - spin_lock_irq(¤t->sighand->siglock); + spin_lock_irq(&unrcu_pointer(current->sighand)->siglock); if (!fatal_signal_pending(current)) { flush_sigqueue(¤t->pending); flush_sigqueue(¤t->signal->shared_pending); @@ -2561,13 +2561,13 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) sigemptyset(¤t->blocked); recalc_sigpending(); } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&unrcu_pointer(current->sighand)->siglock); } /* Wake up the parent if it is waiting so that it can recheck * wait permission to the new task SID. */ read_lock(&tasklist_lock); - __wake_up_parent(current, current->real_parent); + __wake_up_parent(current, unrcu_pointer(current->real_parent)); read_unlock(&tasklist_lock); } diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index 20b3b22438205..5839ca7bb9c75 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -104,7 +104,7 @@ static void sel_ib_pkey_insert(struct sel_ib_pkey *pkey) tail = list_entry( rcu_dereference_protected( - sel_ib_pkey_hash[idx].list.prev, + list_tail_rcu(&sel_ib_pkey_hash[idx].list), lockdep_is_held(&sel_ib_pkey_lock)), struct sel_ib_pkey, list); list_del_rcu(&tail->list); diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index 889552db0d31a..0ac7df9a93677 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -164,8 +164,9 @@ static void sel_netnode_insert(struct sel_netnode *node) if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) { struct sel_netnode *tail; tail = list_entry( - rcu_dereference_protected(sel_netnode_hash[idx].list.prev, - lockdep_is_held(&sel_netnode_lock)), + rcu_dereference_protected( + list_tail_rcu(&sel_netnode_hash[idx].list), + lockdep_is_held(&sel_netnode_lock)), struct sel_netnode, list); list_del_rcu(&tail->list); kfree_rcu(tail, rcu); diff --git a/security/selinux/netport.c b/security/selinux/netport.c index 9ba09d11c0f5b..8eec6347cf012 100644 --- a/security/selinux/netport.c +++ b/security/selinux/netport.c @@ -113,7 +113,7 @@ static void sel_netport_insert(struct sel_netport *port) struct sel_netport *tail; tail = list_entry( rcu_dereference_protected( - sel_netport_hash[idx].list.prev, + list_tail_rcu(&sel_netport_hash[idx].list), lockdep_is_held(&sel_netport_lock)), struct sel_netport, list); list_del_rcu(&tail->list); -- GitLab From 4d266c247d56751c2c97e0c411212b59e90922fc Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Mon, 13 Dec 2021 11:40:24 +0530 Subject: [PATCH 0145/1586] rcu/exp: Fix check for idle context in rcu_exp_handler For PREEMPT_RCU, the rcu_exp_handler() function checks whether the current CPU is in idle, by calling rcu_dynticks_curr_cpu_in_eqs(). However, rcu_exp_handler() is called in IPI handler context. So, it should be checking the idle context using rcu_is_cpu_rrupt_from_idle(). Fix this by using rcu_is_cpu_rrupt_from_idle() instead of rcu_dynticks_curr_cpu_in_eqs(). Non-preempt configuration already uses the correct check. Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 237a79989abae..1568c8ef185b2 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -656,7 +656,7 @@ static void rcu_exp_handler(void *unused) */ if (!depth) { if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) || - rcu_dynticks_curr_cpu_in_eqs()) { + rcu_is_cpu_rrupt_from_idle()) { rcu_report_exp_rdp(rdp); } else { WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); -- GitLab From 63c564da11cbed96ec6cf0b5faf6af0b7e3624d2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Dec 2021 21:00:02 -0800 Subject: [PATCH 0146/1586] rcu: Mark ->expmask access in synchronize_rcu_expedited_wait() This commit adds a READ_ONCE() to an access to the rcu_node structure's ->expmask field to prevent compiler mischief. Detected by KCSAN. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 1568c8ef185b2..60197ea24ceb9 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -502,7 +502,8 @@ static void synchronize_rcu_expedited_wait(void) if (synchronize_rcu_expedited_wait_once(1)) return; rcu_for_each_leaf_node(rnp) { - for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { + mask = READ_ONCE(rnp->expmask); + for_each_leaf_node_cpu_mask(rnp, cpu, mask) { rdp = per_cpu_ptr(&rcu_data, cpu); if (rdp->rcu_forced_tick_exp) continue; -- GitLab From a47f9f131dfe4f765e385fa90e13032eadb00bac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Dec 2021 11:05:07 -0800 Subject: [PATCH 0147/1586] rcu: Mark accesses to boost_starttime The boost_starttime shared variable has conflicting unmarked C-language accesses, which are dangerous at best. This commit therefore adds appropriate marking. This was found by KCSAN. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 422f7e4cc08de..829ae0b7d3c04 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -997,7 +997,7 @@ static int rcu_torture_boost(void *arg) goto checkwait; /* Wait for the next test interval. */ - oldstarttime = boost_starttime; + oldstarttime = READ_ONCE(boost_starttime); while (time_before(jiffies, oldstarttime)) { schedule_timeout_interruptible(oldstarttime - jiffies); if (stutter_wait("rcu_torture_boost")) @@ -1041,10 +1041,11 @@ static int rcu_torture_boost(void *arg) * interval. Besides, we are running at RT priority, * so delays should be relatively rare. */ - while (oldstarttime == boost_starttime && !kthread_should_stop()) { + while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) { if (mutex_trylock(&boost_mutex)) { if (oldstarttime == boost_starttime) { - boost_starttime = jiffies + test_boost_interval * HZ; + WRITE_ONCE(boost_starttime, + jiffies + test_boost_interval * HZ); n_rcu_torture_boosts++; } mutex_unlock(&boost_mutex); -- GitLab From 02e3024175274ed4bf7912e7a1281b300cec76b5 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Sat, 11 Dec 2021 22:31:39 +0530 Subject: [PATCH 0148/1586] rcu/nocb: Handle concurrent nocb kthreads creation When multiple CPUs in the same nocb gp/cb group concurrently come online, they might try to concurrently create the same rcuog kthread. Fix this by using nocb gp CPU's spawn mutex to provide mutual exclusion for the rcuog kthread creation code. [ paulmck: Whitespace fixes per kernel test robot feedback. ] Acked-by: David Woodhouse Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 2 ++ kernel/rcu/tree_nocb.h | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd085..24dd4b0d805f1 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -203,6 +203,8 @@ struct rcu_data { int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ struct timer_list nocb_timer; /* Enforce finite deferral. */ unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */ + struct mutex nocb_gp_kthread_mutex; /* Exclusion for nocb gp kthread */ + /* spawning */ /* The following fields are used by call_rcu, hence own cacheline. */ raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp; diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index eeafb546a7a09..1e40519d1a05a 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1226,6 +1226,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) raw_spin_lock_init(&rdp->nocb_gp_lock); timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); rcu_cblist_init(&rdp->nocb_bypass); + mutex_init(&rdp->nocb_gp_kthread_mutex); } /* @@ -1248,13 +1249,17 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) /* If we didn't spawn the GP kthread first, reorganize! */ rdp_gp = rdp->nocb_gp_rdp; + mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); if (!rdp_gp->nocb_gp_kthread) { t = kthread_run(rcu_nocb_gp_kthread, rdp_gp, "rcuog/%d", rdp_gp->cpu); - if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) + if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) { + mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); return; + } WRITE_ONCE(rdp_gp->nocb_gp_kthread, t); } + mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); /* Spawn the kthread for this CPU. */ t = kthread_run(rcu_nocb_cb_kthread, rdp, -- GitLab From eae9f147a4b02e132187a2d88a403b9ccc28212a Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Mon, 13 Dec 2021 12:32:09 +0530 Subject: [PATCH 0149/1586] rcu: Remove unused rcu_state.boost Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 5 ++--- kernel/rcu/tree_plugin.h | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 24dd4b0d805f1..e9990945483f1 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -304,9 +304,8 @@ struct rcu_state { /* The following fields are guarded by the root rcu_node's lock. */ - u8 boost ____cacheline_internodealigned_in_smp; - /* Subject to priority boost. */ - unsigned long gp_seq; /* Grace-period sequence #. */ + unsigned long gp_seq ____cacheline_internodealigned_in_smp; + /* Grace-period sequence #. */ unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ struct task_struct *gp_kthread; /* Task for grace periods. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a15..109429e70a642 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1175,8 +1175,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) return; - rcu_state.boost = 1; - t = kthread_create(rcu_boost_kthread, (void *)rnp, "rcub/%d", rnp_index); if (WARN_ON_ONCE(IS_ERR(t))) -- GitLab From 4b4399b2450de38916718ba9947e6cdb69c99c55 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 29 Dec 2021 00:05:10 +0800 Subject: [PATCH 0150/1586] rcu: Create per-cpu rcuc kthreads only when rcutree.use_softirq=0 The per-CPU "rcuc" kthreads are used only by kernels booted with rcutree.use_softirq=0, but they are nevertheless unconditionally created by kernels built with CONFIG_RCU_BOOST=y. This results in "rcuc" kthreads being created that are never actually used. This commit therefore refrains from creating these kthreads unless the kernel is actually booted with rcutree.use_softirq=0. Acked-by: Sebastian Andrzej Siewior Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0b..4e5e37e5ee3c9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2894,7 +2894,7 @@ static int __init rcu_spawn_core_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0; - if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq) + if (use_softirq) return 0; WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__); -- GitLab From c8db27dd0ea8071d2ea29a1a401c4ccc611ec6c1 Mon Sep 17 00:00:00 2001 From: Alison Chaiken Date: Tue, 11 Jan 2022 15:32:50 -0800 Subject: [PATCH 0151/1586] rcu: Move kthread_prio bounds-check to a separate function Move the bounds-check of the kthread_prio cmdline parameter to a new function in order to faciliate a different callsite. Signed-off-by: Alison Chaiken Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4e5e37e5ee3c9..5bf0312f66760 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4440,26 +4440,10 @@ static int rcu_pm_notify(struct notifier_block *self, static int __init rcu_spawn_gp_kthread(void) { unsigned long flags; - int kthread_prio_in = kthread_prio; struct rcu_node *rnp; struct sched_param sp; struct task_struct *t; - /* Force priority into range. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2 - && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) - kthread_prio = 2; - else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) - kthread_prio = 1; - else if (kthread_prio < 0) - kthread_prio = 0; - else if (kthread_prio > 99) - kthread_prio = 99; - - if (kthread_prio != kthread_prio_in) - pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", - kthread_prio, kthread_prio_in); - rcu_scheduler_fully_active = 1; t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__)) @@ -4584,6 +4568,28 @@ static void __init rcu_init_one(void) } } +/* + * Force priority from the kernel command-line into range. + */ +static void __init sanitize_kthread_prio(void) +{ + int kthread_prio_in = kthread_prio; + + if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2 + && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) + kthread_prio = 2; + else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) + kthread_prio = 1; + else if (kthread_prio < 0) + kthread_prio = 0; + else if (kthread_prio > 99) + kthread_prio = 99; + + if (kthread_prio != kthread_prio_in) + pr_alert("%s: Limited prio to %d from %d\n", + __func__, kthread_prio, kthread_prio_in); +} + /* * Compute the rcu_node tree geometry from kernel parameters. This cannot * replace the definitions in tree.h because those are needed to size @@ -4744,6 +4750,7 @@ void __init rcu_init(void) kfree_rcu_batch_init(); rcu_bootup_announce(); + sanitize_kthread_prio(); rcu_init_geometry(); rcu_init_one(); if (dump_tree) -- GitLab From 54577e23fa0791599db1a3d86fc8e7a205d3da75 Mon Sep 17 00:00:00 2001 From: Alison Chaiken Date: Tue, 11 Jan 2022 15:32:51 -0800 Subject: [PATCH 0152/1586] rcu: Make priority of grace-period thread consistent The priority of RCU grace period threads is set to kthread_prio when they are launched from rcu_spawn_gp_kthread(). The same is not true of rcu_spawn_one_nocb_kthread(). Accordingly, add priority elevation to rcu_spawn_one_nocb_kthread(). Signed-off-by: Alison Chaiken Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_nocb.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 1e40519d1a05a..ea889cbfc3b95 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1239,6 +1239,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_data *rdp_gp; struct task_struct *t; + struct sched_param sp; if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup) return; @@ -1248,6 +1249,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) return; /* If we didn't spawn the GP kthread first, reorganize! */ + sp.sched_priority = kthread_prio; rdp_gp = rdp->nocb_gp_rdp; mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); if (!rdp_gp->nocb_gp_kthread) { @@ -1258,6 +1260,8 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) return; } WRITE_ONCE(rdp_gp->nocb_gp_kthread, t); + if (kthread_prio) + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); } mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); -- GitLab From c8b16a65267e35ecc5621dbc81cbe7e5b0992fce Mon Sep 17 00:00:00 2001 From: Alison Chaiken Date: Tue, 11 Jan 2022 15:32:52 -0800 Subject: [PATCH 0153/1586] rcu: Elevate priority of offloaded callback threads When CONFIG_PREEMPT_RT=y, the rcutree.kthread_prio command-line parameter signals initialization code to boost the priority of rcuc callbacks to the designated value. With the additional CONFIG_RCU_NOCB_CPU=y configuration and an additional rcu_nocbs command-line parameter, the callbacks on the listed cores are offloaded to new rcuop kthreads that are not pinned to the cores whose post-grace-period work is performed. While the rcuop kthreads perform the same function as the rcuc kthreads they offload, the kthread_prio parameter only boosts the priority of the rcuc kthreads. Fix this inconsistency by elevating rcuop kthreads to the same priority as the rcuc kthreads. Signed-off-by: Alison Chaiken Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_nocb.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5bf0312f66760..9e4c5b281f003 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -153,7 +153,7 @@ static void sync_sched_exp_online_cleanup(int cpu); static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp); static bool rcu_rdp_is_offloaded(struct rcu_data *rdp); -/* rcuc/rcub kthread realtime priority */ +/* rcuc/rcub/rcuop kthread realtime priority */ static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; module_param(kthread_prio, int, 0444); diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index ea889cbfc3b95..547c41437c767 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1270,6 +1270,9 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) "rcuo%c/%d", rcu_state.abbr, cpu); if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__)) return; + + if (kthread_prio) + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); WRITE_ONCE(rdp->nocb_cb_kthread, t); WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); } -- GitLab From a469948b20ed9898f61e9906fd9b135ad853bb94 Mon Sep 17 00:00:00 2001 From: Alison Chaiken Date: Tue, 11 Jan 2022 15:32:53 -0800 Subject: [PATCH 0154/1586] rcu: Update documentation regarding kthread_prio cmdline parameter Inform readers that the priority of RCU no-callback threads will also be boosted. Signed-off-by: Alison Chaiken Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f5a27f067db9e..8e2e65122b993 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4504,6 +4504,8 @@ (the least-favored priority). Otherwise, when RCU_BOOST is not set, valid values are 0-99 and the default is zero (non-realtime operation). + When RCU_NOCB_CPU is set, also adjust the + priority of NOCB callback kthreads. rcutree.rcu_nocb_gp_stride= [KNL] Set the number of NOCB callback kthreads in -- GitLab From 10c535787436d62ea28156a4b91365fd89b5a432 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jan 2022 12:40:08 -0800 Subject: [PATCH 0155/1586] rcu: Don't deboost before reporting expedited quiescent state Currently rcu_preempt_deferred_qs_irqrestore() releases rnp->boost_mtx before reporting the expedited quiescent state. Under heavy real-time load, this can result in this function being preempted before the quiescent state is reported, which can in turn prevent the expedited grace period from completing. Tim Murray reports that the resulting expedited grace periods can take hundreds of milliseconds and even more than one second, when they should normally complete in less than a millisecond. This was fine given that there were no particular response-time constraints for synchronize_rcu_expedited(), as it was designed for throughput rather than latency. However, some users now need sub-100-millisecond response-time constratints. This patch therefore follows Neeraj's suggestion (seconded by Tim and by Uladzislau Rezki) of simply reversing the two operations. Reported-by: Tim Murray Reported-by: Joel Fernandes Reported-by: Neeraj Upadhyay Reviewed-by: Neeraj Upadhyay Reviewed-by: Uladzislau Rezki (Sony) Tested-by: Tim Murray Cc: Todd Kjos Cc: Sandeep Patil Cc: # 5.4.x Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 109429e70a642..02ac057ba3f83 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); } else { local_irq_restore(flags); } -- GitLab From c9515875850fefcc79492c5189fe8431e75ddec5 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Tue, 25 Jan 2022 10:47:44 +0800 Subject: [PATCH 0156/1586] rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings When the rcutree.use_softirq kernel boot parameter is set to zero, all RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads. If these kthreads are being starved, quiescent states will not be reported, which in turn means that the grace period will not end, which can in turn trigger RCU CPU stall warnings. This commit therefore dumps stack traces of stalled CPUs' rcuc kthreads, which can help identify what is preventing those kthreads from running. Suggested-by: Ammar Faizi Reviewed-by: Ammar Faizi Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 3 +++ kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 3 +++ kernel/rcu/tree_stall.h | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9e4c5b281f003..bd9b2af247abd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2850,10 +2850,12 @@ static void rcu_cpu_kthread(unsigned int cpu) { unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); + unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity); int spincnt; trace_rcu_utilization(TPS("Start CPU kthread@rcu_run")); for (spincnt = 0; spincnt < 10; spincnt++) { + WRITE_ONCE(*j, jiffies); local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; local_irq_disable(); @@ -2874,6 +2876,7 @@ static void rcu_cpu_kthread(unsigned int cpu) schedule_timeout_idle(2); trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); *statusp = RCU_KTHREAD_WAITING; + WRITE_ONCE(*j, jiffies); } static struct smp_hotplug_thread rcu_cpu_thread_spec = { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e9990945483f1..b84cc5742c317 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -239,6 +239,7 @@ struct rcu_data { /* rcuc per-CPU kthread or NULL. */ unsigned int rcu_cpu_kthread_status; char rcu_cpu_has_work; + unsigned long rcuc_activity; /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 02ac057ba3f83..8167cab1bffc8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) */ static void rcu_cpu_kthread_setup(unsigned int cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); #ifdef CONFIG_RCU_BOOST struct sched_param sp; sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); #endif /* #ifdef CONFIG_RCU_BOOST */ + + WRITE_ONCE(rdp->rcuc_activity, jiffies); } #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c9030b..0c5d8516516af 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp) return j > 2 * HZ; } +static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp) +{ + unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity); + + if (jp) + *jp = j; + return j > 2 * HZ; +} + /* * Print out diagnostic information for the specified stalled CPU. * @@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu) falsepositive ? " (false positive?)" : ""); } +static void rcuc_kthread_dump(struct rcu_data *rdp) +{ + int cpu; + unsigned long j; + struct task_struct *rcuc; + + rcuc = rdp->rcu_cpu_kthread_task; + if (!rcuc) + return; + + cpu = task_cpu(rcuc); + if (cpu_is_offline(cpu) || idle_cpu(cpu)) + return; + + if (!rcu_is_rcuc_kthread_starving(rdp, &j)) + return; + + pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); + sched_show_task(rcuc); + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); +} + /* Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) { @@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps) rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); + if (!use_softirq) + rcuc_kthread_dump(rdp); + rcu_dump_cpu_stacks(); raw_spin_lock_irqsave_rcu_node(rnp, flags); -- GitLab From 6f81bd6a4e305d15d9c2a6a350e2876a7a814d7e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 6 Dec 2021 15:12:14 -0800 Subject: [PATCH 0157/1586] rcutorture: Print message before invoking ->cb_barrier() The various ->cb_barrier() functions, for example, rcu_barrier(), sometimes cause rcutorture hangs. But currently, the last console message is the unenlightening "Stopping rcu_torture_stats". This commit therefore prints a message of the form "rcu_torture_cleanup: Invoking rcu_barrier+0x0/0x1e0()" to help point people in the right direction. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 422f7e4cc08de..00400aef58184 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2905,8 +2905,10 @@ rcu_torture_cleanup(void) int i; if (torture_cleanup_begin()) { - if (cur_ops->cb_barrier != NULL) + if (cur_ops->cb_barrier != NULL) { + pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier); cur_ops->cb_barrier(); + } return; } if (!cur_ops) { @@ -2961,8 +2963,10 @@ rcu_torture_cleanup(void) * Wait for all RCU callbacks to fire, then do torture-type-specific * cleanup operations. */ - if (cur_ops->cb_barrier != NULL) + if (cur_ops->cb_barrier != NULL) { + pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier); cur_ops->cb_barrier(); + } if (cur_ops->cleanup != NULL) cur_ops->cleanup(); -- GitLab From 2b4a7f20f160e6440848c62a70ee5dc5237a2c8b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Dec 2021 12:23:31 -0800 Subject: [PATCH 0158/1586] torture: Distinguish kthread stopping and being asked to stop Right now, if a given kthread (call it "kthread") realizes that it needs to stop, "Stopping kthread" is written to the console. When the cleanup code decides that it is time to stop that kthread, "Stopping kthread tasks" is written to the console. These two events might happen in either order, especially in the case of time-based torture-test shutdown. But it is hard to distinguish these, especially for those unfamiliar with the torture tests. This commit therefore changes the first case from "Stopping kthread" to "kthread is stopping" to make things more clear. Signed-off-by: Paul E. McKenney --- kernel/torture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/torture.c b/kernel/torture.c index ef27a6c824514..f55d803f995d4 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -911,7 +911,7 @@ void torture_kthread_stopping(char *title) { char buf[128]; - snprintf(buf, sizeof(buf), "Stopping %s", title); + snprintf(buf, sizeof(buf), "%s is stopping", title); VERBOSE_TOROUT_STRING(buf); while (!kthread_should_stop()) { torture_shutdown_absorb(title); -- GitLab From 05b724655bf1908abf531dd0bce455e55703a3a8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Dec 2021 15:36:02 -0800 Subject: [PATCH 0159/1586] rcutorture: Increase visibility of forward-progress hangs This commit adds a few pr_alert() calls to rcutorture's forward-progress testing in order to better diagnose shutdown-time hangs. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 00400aef58184..fefc3fa1a9c2a 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2281,6 +2281,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, unsigned long stopat; static DEFINE_TORTURE_RANDOM(trs); + pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id); if (!cur_ops->sync) return; // Cannot do need_resched() forward progress testing without ->sync. if (cur_ops->call && cur_ops->cb_barrier) { @@ -2325,6 +2326,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, if (selfpropcb) { WRITE_ONCE(fcs.stop, 1); cur_ops->sync(); /* Wait for running CB to complete. */ + pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id); cur_ops->cb_barrier(); /* Wait for queued callbacks. */ } @@ -2353,6 +2355,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) unsigned long stopat; unsigned long stoppedat; + pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id); if (READ_ONCE(rcu_fwd_emergency_stop)) return; /* Get out of the way quickly, no GP wait! */ if (!cur_ops->call) @@ -2414,6 +2417,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb); cver = READ_ONCE(rcu_torture_current_version) - cver; gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); + pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id); cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ (void)rcu_torture_fwd_prog_cbfree(rfp); -- GitLab From e22ef8df415d924428e35c9c112526306e684adc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Dec 2021 12:33:53 -0800 Subject: [PATCH 0160/1586] rcutorture: Make rcu_fwd_cb_nodelay be a counter Back when only one rcutorture kthread could do forward-progress testing, it was just fine for rcu_fwd_cb_nodelay to be a non-atomic bool. It was set at the start of forward-progress testing and cleared at the end. But now that there are multiple threads, the value can be cleared while one of the threads is still doing forward-progress testing. This commit therefore makes rcu_fwd_cb_nodelay be an atomic counter, replacing the WRITE_ONCE() operations with atomic_inc() and atomic_dec(). Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index fefc3fa1a9c2a..afe95c6948957 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -284,7 +284,7 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); -static bool rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */ +static atomic_t rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */ /* * Allocate an element from the rcu_tortures pool. @@ -387,7 +387,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) * period, and we want a long delay occasionally to trigger * force_quiescent_state. */ - if (!READ_ONCE(rcu_fwd_cb_nodelay) && + if (!atomic_read(&rcu_fwd_cb_nodelay) && !(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) { started = cur_ops->get_gp_seq(); ts = rcu_trace_clock_local(); @@ -1276,7 +1276,7 @@ rcu_torture_writer(void *arg) boot_ended = rcu_inkernel_boot_has_ended(); stutter_waited = stutter_wait("rcu_torture_writer"); if (stutter_waited && - !READ_ONCE(rcu_fwd_cb_nodelay) && + !atomic_read(&rcu_fwd_cb_nodelay) && !cur_ops->slow_gps && !torture_must_stop() && boot_ended) @@ -2290,7 +2290,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, } /* Tight loop containing cond_resched(). */ - WRITE_ONCE(rcu_fwd_cb_nodelay, true); + atomic_inc(&rcu_fwd_cb_nodelay); cur_ops->sync(); /* Later readers see above write. */ if (selfpropcb) { WRITE_ONCE(fcs.stop, 0); @@ -2335,7 +2335,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, destroy_rcu_head_on_stack(&fcs.rh); } schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */ - WRITE_ONCE(rcu_fwd_cb_nodelay, false); + atomic_dec(&rcu_fwd_cb_nodelay); } /* Carry out call_rcu() forward-progress testing. */ @@ -2362,7 +2362,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) return; /* Can't do call_rcu() fwd prog without ->call. */ /* Loop continuously posting RCU callbacks. */ - WRITE_ONCE(rcu_fwd_cb_nodelay, true); + atomic_inc(&rcu_fwd_cb_nodelay); cur_ops->sync(); /* Later readers see above write. */ WRITE_ONCE(rfp->rcu_fwd_startat, jiffies); stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES; @@ -2435,7 +2435,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) } schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */ tick_dep_clear_task(current, TICK_DEP_BIT_RCU); - WRITE_ONCE(rcu_fwd_cb_nodelay, false); + atomic_dec(&rcu_fwd_cb_nodelay); } -- GitLab From 02b51a1cf47977d8772c7dcc363ef6a1e6e59f21 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Dec 2021 15:05:05 -0800 Subject: [PATCH 0161/1586] rcutorture: Add end-of-test check to rcu_torture_fwd_prog() loop The second and subsequent forward-progress kthreads loop waiting for the first forward-progress kthread to start the next test interval. Unfortunately, if the test ends while one of those kthreads is waiting, the test will hang. This hang occurs because that wait loop fails to check for the end of the test. This commit therefore adds an end-of-test check to that wait loop. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index afe95c6948957..e99658efd97f4 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2515,7 +2515,7 @@ static int rcu_torture_fwd_prog(void *args) firsttime = false; WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1); } else { - while (READ_ONCE(rcu_fwd_seq) == oldseq) + while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop()) schedule_timeout_interruptible(1); oldseq = READ_ONCE(rcu_fwd_seq); } -- GitLab From 89440d2dad0cc2a781290470cb90402ebba481fc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Dec 2021 15:59:38 -0800 Subject: [PATCH 0162/1586] rcutorture: Fix rcu_fwd_mutex deadlock The rcu_torture_fwd_cb_hist() function acquires rcu_fwd_mutex, but is invoked from rcutorture_oom_notify() function, which hold this same mutex across this call. This commit fixes the resulting deadlock. Reported-by: kernel test robot Tested-by: Oliver Sang Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index e99658efd97f4..1c8f40b90f70b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2180,7 +2180,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--) if (rfp->n_launders_hist[i].n_launders > 0) break; - mutex_lock(&rcu_fwd_mutex); // Serialize histograms. pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):", __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat); gps_old = rfp->rcu_launder_gp_seq_start; @@ -2193,7 +2192,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) gps_old = gps; } pr_cont("\n"); - mutex_unlock(&rcu_fwd_mutex); } /* Callback function for continuous-flood RCU callbacks. */ @@ -2431,7 +2429,9 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) n_launders, n_launders_sa, n_max_gps, n_max_cbs, cver, gps); atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs); + mutex_lock(&rcu_fwd_mutex); // Serialize histograms. rcu_torture_fwd_cb_hist(rfp); + mutex_unlock(&rcu_fwd_mutex); } schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */ tick_dep_clear_task(current, TICK_DEP_BIT_RCU); -- GitLab From 6b8646a9d37c6324cf994dbefb75f3eb20b109ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Jan 2022 06:07:09 -0800 Subject: [PATCH 0163/1586] torture: Wake up kthreads after storing task_struct pointer Currently, _torture_create_kthread() uses kthread_run() to create torture-test kthreads, which means that the resulting task_struct pointer is stored after the newly created kthread has been marked runnable. This in turn can cause spurious failure of checks for code being run by a particular kthread. This commit therefore changes _torture_create_kthread() to use kthread_create(), then to do an explicit wake_up_process() after the task_struct pointer has been stored. Reported-by: Frederic Weisbecker Reviewed-by: Neeraj Upadhyay Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/torture.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/torture.c b/kernel/torture.c index f55d803f995d4..789aeb0e1159c 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -931,12 +931,14 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, int ret = 0; VERBOSE_TOROUT_STRING(m); - *tp = kthread_run(fn, arg, "%s", s); + *tp = kthread_create(fn, arg, "%s", s); if (IS_ERR(*tp)) { ret = PTR_ERR(*tp); TOROUT_ERRSTRING(f); *tp = NULL; + return ret; } + wake_up_process(*tp); // Process is sleeping, so ordering provided. torture_shuffle_task_register(*tp); return ret; } -- GitLab From 9c0f1c7fd7c6e1e0f5b84e20c577fbab62563d03 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Jan 2022 20:29:10 -0800 Subject: [PATCH 0164/1586] rcutorture: Enable limited callback-flooding tests of SRCU This commit allows up to 50,000 callbacks worth of callback-flooding tests of SRCU. The goal of this change is to exercise Tree SRCU's ability to transition from SRCU_SIZE_SMALL to SRCU_SIZE_BIG triggered by callback-queue-time lock contention. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 1c8f40b90f70b..b41db719085ef 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -674,6 +674,7 @@ static struct rcu_torture_ops srcu_ops = { .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, + .cbflood_max = 50000, .irq_capable = 1, .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcu" @@ -708,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = { .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, + .cbflood_max = 50000, .irq_capable = 1, .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcud" -- GitLab From bee6f2169935658fc405128a8b763ea49f50985c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Dec 2021 19:19:13 -0800 Subject: [PATCH 0165/1586] torture: Drop trailing ^M from console output Console logs can sometimes have trailing control-M characters, which the forward-progress evaluation code in kvm-recheck-rcu.sh passes through to the user output. Which does not cause a technical problem, but which can look ugly. This commit therefore strips the control-M characters. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 1c4c2c727dad1..43e1387234d11 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^\[[ 0-9.]*] //' | awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | tr -d '\012\015'`" -fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`" +fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`" if test -z "$ngps" then echo "$configfile ------- " $stopstate -- GitLab From 010e5773b2050db260395a240e2f7adee3108603 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Dec 2021 11:24:05 -0800 Subject: [PATCH 0166/1586] torture: Allow four-digit repetition numbers for --configs parameter In a clear-cut case of "not thinking big enough", kvm.sh limits the multipliers for torture-test scenarios to three digits. Although this is large enough for any single system that I have ever run rcutorture on, it does become a problem when you want to use kvm-remote.sh to run as many instances of TREE09 as fit on a set of 20 systems with 80 CPUs each. Yes, one could simply say "--configs '800*TREE09 800*TREE09'", but this commit removes the need for that sort of hacky workaround by permitting four-digit repetition numbers, thus allowing "--configs '1600*TREE09'". Five-digit repetition numbers remain off the menu. Should they ever really be needed, they can easily be added! Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6de0c183db5b9..348ad177a5acc 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -280,7 +280,7 @@ configs_derep= for CF in $configs do case $CF in - [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**) + [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**) config_reps=`echo $CF | sed -e 's/\*.*$//'` CF1=`echo $CF | sed -e 's/^[^*]*\*//'` ;; -- GitLab From 21fbc62576b67d6db66cae88710f82bd51a556e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 4 Dec 2021 13:53:24 -0800 Subject: [PATCH 0167/1586] torture: Output per-failed-run summary lines from torture.sh Currently, torture.sh lists the failed runs, but it is up to the user to work out what failed. This is especially annoying for KCSAN runs, where RCU's tighter definitions result in failures being reported for other parts of the kernel. This commit therefore outputs "Summary:" lines for each failed run, allowing the user to more quickly identify which failed runs need focused attention. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/torture.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index eae88aacca2aa..894f589dd5625 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -414,7 +414,7 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - cat "$T/failures" | tee -a $T/log + awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" ret=2 fi -- GitLab From a711aaccf6b389b7fa7f3e35b4f99c437a36b9b2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 4 Dec 2021 21:00:55 -0800 Subject: [PATCH 0168/1586] torture: Make kvm.sh summaries note runs having only KCSAN reports Runs having only KCSAN reports will normally print a summary line containing only a "Bugs:" entry. However, these bugs might or might not be KCSAN reports. This commit therefore flags runs in which all the "Bugs:" entries are KCSAN reports. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/console-badness.sh | 2 +- .../testing/selftests/rcutorture/bin/parse-console.sh | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index e6a132df61721..69f8a5958cefc 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 9f624bd53c277..822eb037a0573 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -138,6 +138,16 @@ then then summary="$summary Bugs: $n_bugs" fi + n_kcsan=`egrep -c 'BUG: KCSAN: ' $file` + if test "$n_kcsan" -ne 0 + then + if test "$n_bugs" = "$n_kcsan" + then + summary="$summary (all bugs kcsan)" + else + summary="$summary KCSAN: $n_kcsan" + fi + fi n_calltrace=`grep -c 'Call Trace:' $file` if test "$n_calltrace" -ne 0 then -- GitLab From 9a32ed1cf6cdebff7ccd82b91f8021267ed0d9cb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 6 Dec 2021 09:13:37 -0800 Subject: [PATCH 0169/1586] torture: Indicate which torture.sh runs' bugs are all KCSAN reports This commit further improves torture.sh run summaries by indicating which runs' "Bugs:" counts are all KCSAN reports, and further printing an additional end-of-run summary line when all errors reported in all runs were KCSAN reports. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/torture.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 894f589dd5625..bddce72ea5ce4 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -414,8 +414,14 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log + awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" + grep "^ Summary: " "$T/failuresum" | + grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" + if test -s "$T/nonkcsan" + then + nonkcsanbug="yes" + fi ret=2 fi if test "$do_kcsan" = "yes" @@ -424,6 +430,10 @@ then fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log +if test -z "$nonkcsanbug" && test -s "$T/failuresum" +then + echo " All bugs were KCSAN failures." +fi tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0 then -- GitLab From 99c80a96a512f32a234687343ff1c8e5e033976b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Dec 2021 16:14:31 -0800 Subject: [PATCH 0170/1586] torture: Compress KCSAN as well as KASAN vmlinux files Compressing KASAN vmlinux files reduces torture.sh res file size from about 100G to about 50G, which is good, but the KCSAN vmlinux files are also large. Compressing them reduces their size from about 700M to about 100M (but of course your mileage may vary). This commit therefore compresses both KASAN and KCSAN vmlinux files. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/torture.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index bddce72ea5ce4..a5f1c5fbefe4c 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -37,7 +37,7 @@ configs_scftorture= kcsan_kmake_args= # Default compression, duration, and apportionment. -compress_kasan_vmlinux="`identify_qemu_vcpus`" +compress_concurrency="`identify_qemu_vcpus`" duration_base=10 duration_rcutorture_frac=7 duration_locktorture_frac=1 @@ -67,7 +67,7 @@ function doyesno () { usage () { echo "Usage: $scriptname optional arguments:" - echo " --compress-kasan-vmlinux concurrency" + echo " --compress-concurrency concurrency" echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" @@ -91,9 +91,9 @@ usage () { while test $# -gt 0 do case "$1" in - --compress-kasan-vmlinux) - checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' - compress_kasan_vmlinux=$2 + --compress-concurrency) + checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' + compress_concurrency=$2 shift ;; --config-rcutorture|--configs-rcutorture) @@ -435,11 +435,11 @@ then echo " All bugs were KCSAN failures." fi tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" -if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0 +if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. - echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1 - find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo + echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 + find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo ncompresses=0 batchno=1 if test -s $T/xz-todo @@ -457,7 +457,7 @@ then do xz "$j" >> "$tdir/log-xz" 2>&1 & ncompresses=$((ncompresses+1)) - if test $ncompresses -ge $compress_kasan_vmlinux + if test $ncompresses -ge $compress_concurrency then echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log wait -- GitLab From 2bc9062e7f394b3c6cbe408a13523c0a4bd13d2a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Dec 2021 20:24:25 -0800 Subject: [PATCH 0171/1586] torture: Make kvm-remote.sh try multiple times to download tarball This commit ups the retries for downloading the build-product tarball to a given remote system from once to five times, the better to handle transient network failures. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-remote.sh | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index e09b1bc787084..29b068a55b466 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -155,18 +155,23 @@ do echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" ret=$? - if test "$ret" -ne 0 - then - echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log" + tries=0 + while test "$ret" -ne 0 + do + echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log" sleep 60 cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" ret=$? if test "$ret" -ne 0 then - echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" - exit 10 + if test "$tries" > 5 + then + echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" + exit 10 + fi fi - fi + tries=$((tries+1)) + done done # Function to check for presence of a file on the specified system. -- GitLab From b376005eb3a8aedcde437d3d495418d63ce5451c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Dec 2021 21:21:35 -0800 Subject: [PATCH 0172/1586] torture: Print only one summary line per run The torture.sh scripts currently duplicates the summary lines, getting one during the run phase and one during the summary phase of each run. This commit therefore removes the run phase from consideration so as to get only one summary line per run. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/torture.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index a5f1c5fbefe4c..d1cb60085d8f2 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -414,7 +414,7 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" + awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" grep "^ Summary: " "$T/failuresum" | grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" -- GitLab From e31ccc1ddd2ad3e14f02a53cfa22e6b2b98c6dab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Jan 2022 15:40:49 -0800 Subject: [PATCH 0173/1586] torture: Make kvm-find-errors.sh notice missing vmlinux file Currently, an obtuse compiler diagnostic can fool kvm-find-errors.sh into believing that the build was successful. This commit therefore adds a check for a missing vmlinux file. Note that in the case of repeated torture-test scenarios ("--configs '2*TREE01'"), the vmlinux file will only be present in the first directory, that is, in TREE01 but not TREE01.2. Link: https://lore.kernel.org/lkml/36bd91e4-8eda-5677-7fde-40295932a640@molgen.mpg.de/ Reported-by: Paul Menzel Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 2e9e9e2eedb69..5f682fc892dd4 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -30,10 +30,16 @@ editor=${EDITOR-vi} files= for i in ${rundir}/*/Make.out do + scenariodir="`dirname $i`" + scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`" if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i then egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags files="$files $i.diags $i" + elif ! test -f ${scenariobasedir}/vmlinux + then + echo No ${scenariobasedir}/vmlinux file > $i.diags + files="$files $i.diags $i" fi done if test -n "$files" -- GitLab From a7d89cfb8e1269cb6d22453adba56b8d0218589f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Jan 2022 21:08:55 -0800 Subject: [PATCH 0174/1586] torture: Change KVM environment variable to RCUTORTURE The torture-test scripting's long-standing use of KVM as the environment variable tracking the pathname of the rcutorture directory now conflicts with allmodconfig builds due to the virt/kvm/Makefile.kvm file's use of this as a makefile variable. This commit therefore changes the torture-test scripting from KVM to RCUTORTURE, avoiding the name conflict. Reported-by: Zhouyi Zhou Tested-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney --- .../testing/selftests/rcutorture/bin/kvm-again.sh | 4 ++-- .../selftests/rcutorture/bin/kvm-check-branches.sh | 4 ++-- .../selftests/rcutorture/bin/kvm-end-run-stats.sh | 4 ++-- .../testing/selftests/rcutorture/bin/kvm-remote.sh | 8 ++++---- tools/testing/selftests/rcutorture/bin/kvm.sh | 14 +++++++------- tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++-- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index 5a0023d183dac..0941f1ddab658 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -47,8 +47,8 @@ else exit 1 fi -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh dryrun= diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh index 370406bbfeedc..f17000a2ccf1f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh @@ -49,8 +49,8 @@ fi mkdir $resdir/$ds echo Results directory: $resdir/$ds -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh echo Using all `identify_qemu_vcpus` CPUs. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh index e4a00779b8c69..ee886b40a5d2c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh @@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ trap 'rm -rf $T' 0 mkdir $T -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh default_starttime="`get_starttime`" starttime="${2-default_starttime}" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 29b068a55b466..8c4c1e4792d02 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -19,8 +19,8 @@ then exit 1 fi -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh starttime="`get_starttime`" @@ -108,8 +108,8 @@ else cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" exit 2 fi - cp -a "$rundir" "$KVM/res/" - oldrun="$KVM/res/$ds" + cp -a "$rundir" "$RCUTORTURE/res/" + oldrun="$RCUTORTURE/res/$ds" fi echo | tee -a "$oldrun/remote-log" echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 348ad177a5acc..55b2c15332827 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG dur=$((30*60)) dryrun="" -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_BUILDONLY= -TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD +TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" TORTURE_KCONFIG_GDB_ARG="" TORTURE_BOOT_GDB_ARG="" @@ -262,7 +262,7 @@ else exit 1 fi -CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG +CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`" if test -z "$configs" @@ -272,7 +272,7 @@ fi if test -z "$resdir" then - resdir=$KVM/res + resdir=$RCUTORTURE/res fi # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus. @@ -386,7 +386,7 @@ END { # Generate a script to execute the tests in appropriate batches. cat << ___EOF___ > $T/script CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG -KVM="$KVM"; export KVM +RCUTORTURE="$RCUTORTURE"; export RCUTORTURE PATH="$PATH"; export PATH TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE @@ -569,7 +569,7 @@ ___EOF___ awk < $T/cfgcpu.pack \ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \ -v CONFIGDIR="$CONFIGFRAG/" \ - -v KVM="$KVM" \ + -v RCUTORTURE="$RCUTORTURE" \ -v ncpus=$cpus \ -v jitter="$jitter" \ -v rd=$resdir/$ds/ \ diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index d1cb60085d8f2..e00e60efb2310 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -13,8 +13,8 @@ scriptname=$0 args="$*" -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" -- GitLab From e2b665f612ca2ddc61c3d54817a3a780aee6b251 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 1 Feb 2022 14:00:08 -0500 Subject: [PATCH 0175/1586] tools/memory-model: Explain syntactic and semantic dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Paul Heidekrüger pointed out that the Linux Kernel Memory Model documentation doesn't mention the distinction between syntactic and semantic dependencies. This is an important difference, because the compiler can easily break dependencies that are only syntactic, not semantic. This patch adds a few paragraphs to the LKMM documentation explaining these issues and illustrating how they can matter. Suggested-by: Paul Heidekrüger Reviewed-by: Akira Yokosawa Signed-off-by: Alan Stern Signed-off-by: Paul E. McKenney --- .../Documentation/explanation.txt | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 394ee57d58f2f..ee819a402b698 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -485,6 +485,57 @@ have R ->po X. It wouldn't make sense for a computation to depend somehow on a value that doesn't get loaded from shared memory until later in the code! +Here's a trick question: When is a dependency not a dependency? Answer: +When it is purely syntactic rather than semantic. We say a dependency +between two accesses is purely syntactic if the second access doesn't +actually depend on the result of the first. Here is a trivial example: + + r1 = READ_ONCE(x); + WRITE_ONCE(y, r1 * 0); + +There appears to be a data dependency from the load of x to the store +of y, since the value to be stored is computed from the value that was +loaded. But in fact, the value stored does not really depend on +anything since it will always be 0. Thus the data dependency is only +syntactic (it appears to exist in the code) but not semantic (the +second access will always be the same, regardless of the value of the +first access). Given code like this, a compiler could simply discard +the value returned by the load from x, which would certainly destroy +any dependency. (The compiler is not permitted to eliminate entirely +the load generated for a READ_ONCE() -- that's one of the nice +properties of READ_ONCE() -- but it is allowed to ignore the load's +value.) + +It's natural to object that no one in their right mind would write +code like the above. However, macro expansions can easily give rise +to this sort of thing, in ways that often are not apparent to the +programmer. + +Another mechanism that can lead to purely syntactic dependencies is +related to the notion of "undefined behavior". Certain program +behaviors are called "undefined" in the C language specification, +which means that when they occur there are no guarantees at all about +the outcome. Consider the following example: + + int a[1]; + int i; + + r1 = READ_ONCE(i); + r2 = READ_ONCE(a[r1]); + +Access beyond the end or before the beginning of an array is one kind +of undefined behavior. Therefore the compiler doesn't have to worry +about what will happen if r1 is nonzero, and it can assume that r1 +will always be zero regardless of the value actually loaded from i. +(If the assumption turns out to be wrong the resulting behavior will +be undefined anyway, so the compiler doesn't care!) Thus the value +from the load can be discarded, breaking the address dependency. + +The LKMM is unaware that purely syntactic dependencies are different +from semantic dependencies and therefore mistakenly predicts that the +accesses in the two examples above will be ordered. This is another +example of how the compiler can undermine the memory model. Be warned. + THE READS-FROM RELATION: rf, rfi, and rfe ----------------------------------------- -- GitLab From fc153c1c58cb8c3bb3b443b4d7dc3211ff5f65fc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 5 Dec 2021 22:38:15 -0500 Subject: [PATCH 0176/1586] clocksource: Add a Kconfig option for WATCHDOG_MAX_SKEW A watchdog maximum skew of 100us may still be too small for some systems or archs. It may also be too small when some kernel debug config options are enabled. So add a new Kconfig option CLOCKSOURCE_WATCHDOG_MAX_SKEW_US to allow kernel builders to have more control on the threshold for marking clocksource as unstable. Signed-off-by: Waiman Long Signed-off-by: Paul E. McKenney --- kernel/time/Kconfig | 9 +++++++++ kernel/time/clocksource.c | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 04bfd62f5e5ca..27b7868b5c30d 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -181,5 +181,14 @@ config HIGH_RES_TIMERS hardware is not capable then this option only increases the size of the kernel image. +config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US + int "Clocksource watchdog maximum allowable skew (in μs)" + depends on CLOCKSOURCE_WATCHDOG + range 50 1000 + default 100 + help + Specify the maximum amount of allowable watchdog skew in + microseconds before reporting the clocksource to be unstable. + endmenu endif diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1cf73807b4503..95d7ca35bdf2c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -107,7 +107,13 @@ static u64 suspend_start; * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as * a lower bound for cs->uncertainty_margin values when registering clocks. */ -#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) +#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US +#define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US +#else +#define MAX_SKEW_USEC 100 +#endif + +#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC) #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); -- GitLab From fbdee71bb5d8d054e1bdb5af4c540f2cb86fe296 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Jan 2022 08:16:47 +0100 Subject: [PATCH 0177/1586] block: deprecate autoloading based on dev_t Make the legacy dev_t based autoloading optional and add a deprecation warning. This kind of autoloading has ceased to be useful about 20 years ago. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220104071647.164918-1-hch@lst.de Signed-off-by: Jens Axboe --- block/Kconfig | 12 ++++++++++++ block/bdev.c | 9 ++++++--- block/genhd.c | 6 ++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/block/Kconfig b/block/Kconfig index d5d4197b7ed2d..205f8d01c6952 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -26,6 +26,18 @@ menuconfig BLOCK if BLOCK +config BLOCK_LEGACY_AUTOLOAD + bool "Legacy autoloading support" + help + Enable loading modules and creating block device instances based on + accesses through their device special file. This is a historic Linux + feature and makes no sense in a udev world where device files are + created on demand. + + Say N here unless booting or other functionality broke without it, in + which case you should also send a report to your distribution and + linux-block@vger.kernel.org. + config BLK_RQ_ALLOC_TIME bool diff --git a/block/bdev.c b/block/bdev.c index 102837a370517..c687726445660 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -733,12 +733,15 @@ struct block_device *blkdev_get_no_open(dev_t dev) struct inode *inode; inode = ilookup(blockdev_superblock, dev); - if (!inode) { + if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { blk_request_module(dev); inode = ilookup(blockdev_superblock, dev); - if (!inode) - return NULL; + if (inode) + pr_warn_ratelimited( +"block device autoloading is deprecated. It will be removed in Linux 5.19\n"); } + if (!inode) + return NULL; /* switch from the inode reference to a device mode one: */ bdev = &BDEV_I(inode)->bdev; diff --git a/block/genhd.c b/block/genhd.c index 626c8406f21a6..6ae990ff02660 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -185,7 +185,9 @@ static struct blk_major_name { struct blk_major_name *next; int major; char name[16]; +#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD void (*probe)(dev_t devt); +#endif } *major_names[BLKDEV_MAJOR_HASH_SIZE]; static DEFINE_MUTEX(major_names_lock); static DEFINE_SPINLOCK(major_names_spinlock); @@ -275,7 +277,9 @@ int __register_blkdev(unsigned int major, const char *name, } p->major = major; +#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD p->probe = probe; +#endif strlcpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); @@ -679,6 +683,7 @@ static ssize_t disk_badblocks_store(struct device *dev, return badblocks_store(disk->bb, page, len, 0); } +#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD void blk_request_module(dev_t devt) { unsigned int major = MAJOR(devt); @@ -698,6 +703,7 @@ void blk_request_module(dev_t devt) /* Make old-style 2.4 aliases work */ request_module("block-major-%d", MAJOR(devt)); } +#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */ /* * print a full list of all partitions - intended for places where the root -- GitLab From 926597ffce0e3e2f785475df18e1636194209910 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:11 +0100 Subject: [PATCH 0178/1586] block: move disk_{block,unblock,flush}_events to blk.h No need to have these declarations in a public header. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk.h | 3 +++ include/linux/genhd.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk.h b/block/blk.h index 8bd43b3ad33d5..2cba50d7e6cb1 100644 --- a/block/blk.h +++ b/block/blk.h @@ -445,6 +445,9 @@ int disk_alloc_events(struct gendisk *disk); void disk_add_events(struct gendisk *disk); void disk_del_events(struct gendisk *disk); void disk_release_events(struct gendisk *disk); +void disk_block_events(struct gendisk *disk); +void disk_unblock_events(struct gendisk *disk); +void disk_flush_events(struct gendisk *disk, unsigned int mask); extern struct device_attribute dev_attr_events; extern struct device_attribute dev_attr_events_async; extern struct device_attribute dev_attr_events_poll_msecs; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6906a45bc761a..504f9a6674ace 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -185,9 +185,6 @@ static inline int bdev_read_only(struct block_device *bdev) return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); } -extern void disk_block_events(struct gendisk *disk); -extern void disk_unblock_events(struct gendisk *disk); -extern void disk_flush_events(struct gendisk *disk, unsigned int mask); bool set_capacity_and_notify(struct gendisk *disk, sector_t size); bool disk_force_media_change(struct gendisk *disk, unsigned int events); -- GitLab From e7243285c0fc87054990fcde630583586ff8ed5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:12 +0100 Subject: [PATCH 0179/1586] block: move blk_drop_partitions to blk.h No need to have this declaration in a public header. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk.h | 1 + include/linux/genhd.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk.h b/block/blk.h index 2cba50d7e6cb1..800c5ae387a0b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -426,6 +426,7 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, int bdev_del_partition(struct gendisk *disk, int partno); int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); +void blk_drop_partitions(struct gendisk *disk); int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 504f9a6674ace..aa4bd985dbe51 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -219,7 +219,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) } int bdev_disk_changed(struct gendisk *disk, bool invalidate); -void blk_drop_partitions(struct gendisk *disk); struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); -- GitLab From 322cbb50de711814c42fb088f6d31901502c711a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:13 +0100 Subject: [PATCH 0180/1586] block: remove genhd.h There is no good reason to keep genhd.h separate from the main blkdev.h header that includes it. So fold the contents of genhd.h into blkdev.h and remove genhd.h entirely. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-4-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/block/capability.rst | 2 +- arch/m68k/atari/stdma.c | 1 - arch/m68k/bvme6000/config.c | 1 - arch/m68k/emu/nfblock.c | 1 - arch/m68k/kernel/setup_mm.c | 1 - arch/m68k/mvme147/config.c | 1 - arch/m68k/mvme16x/config.c | 1 - block/blk-cgroup.c | 1 - block/disk-events.c | 2 +- block/genhd.c | 1 - block/holder.c | 2 +- block/partitions/check.h | 1 - block/partitions/core.c | 1 - block/partitions/efi.h | 1 - block/partitions/ldm.h | 1 - block/sed-opal.c | 2 +- drivers/base/class.c | 2 +- drivers/base/core.c | 2 +- drivers/base/devtmpfs.c | 2 +- drivers/block/aoe/aoeblk.c | 1 - drivers/block/aoe/aoecmd.c | 1 - drivers/block/drbd/drbd_int.h | 1 - drivers/block/mtip32xx/mtip32xx.c | 1 - drivers/block/mtip32xx/mtip32xx.h | 1 - drivers/block/rnbd/rnbd-srv-sysfs.c | 1 - drivers/block/sunvdc.c | 1 - drivers/block/zram/zram_drv.c | 1 - drivers/cdrom/gdrom.c | 1 - drivers/char/random.c | 2 +- drivers/md/bcache/super.c | 1 - drivers/md/dm-core.h | 1 - drivers/mtd/mtdswap.c | 2 +- drivers/mtd/nand/raw/sharpsl.c | 1 - drivers/nvdimm/blk.c | 1 - drivers/nvdimm/btt.c | 1 - drivers/nvdimm/btt_devs.c | 1 - drivers/nvdimm/bus.c | 1 - drivers/nvdimm/pfn_devs.c | 1 - drivers/s390/block/dasd_int.h | 1 - drivers/s390/block/scm_blk.c | 1 - drivers/s390/block/scm_blk.h | 1 - drivers/scsi/scsi_debug.c | 1 - drivers/scsi/scsicam.c | 1 - drivers/scsi/sd.c | 1 - drivers/scsi/sr.h | 1 - drivers/target/target_core_iblock.c | 1 - drivers/target/target_core_pscsi.c | 1 - fs/btrfs/check-integrity.c | 1 - fs/dax.c | 1 - fs/gfs2/sys.c | 2 +- fs/hfs/mdb.c | 2 +- fs/hfsplus/wrapper.c | 1 - fs/ksmbd/vfs.c | 1 - fs/nfs/blocklayout/rpc_pipefs.c | 1 - fs/nfsd/blocklayout.c | 1 - include/linux/blkdev.h | 273 +++++++++++++++++++++++++- include/linux/genhd.h | 287 ---------------------------- include/linux/part_stat.h | 2 +- init/do_mounts.c | 1 - kernel/power/hibernate.c | 1 - kernel/power/swap.c | 1 - security/integrity/ima/ima_policy.c | 1 - 62 files changed, 282 insertions(+), 350 deletions(-) delete mode 100644 include/linux/genhd.h diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst index 160a5148b915f..2ae7f064736ad 100644 --- a/Documentation/block/capability.rst +++ b/Documentation/block/capability.rst @@ -7,4 +7,4 @@ This file documents the sysfs file ``block//capability``. ``capability`` is a bitfield, printed in hexadecimal, indicating which capabilities a specific block device supports: -.. kernel-doc:: include/linux/genhd.h +.. kernel-doc:: include/linux/blkdev.h diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c index ba65f942d0c78..ce6818eff75ef 100644 --- a/arch/m68k/atari/stdma.c +++ b/arch/m68k/atari/stdma.c @@ -30,7 +30,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 0c6feafbbd110..0fe0f3e888fb3 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 9c57b245dc12a..267b02cc5655b 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 49e573b943268..ee268055bdce3 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index dfd6202fd403e..db1430dc411f4 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index b4422c2dfbbf4..45a07ab3123ab 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 650f7e27989f1..671debbae9413 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/block/disk-events.c b/block/disk-events.c index 8d5496e7592a5..aee25a7e1ab7d 100644 --- a/block/disk-events.c +++ b/block/disk-events.c @@ -4,7 +4,7 @@ */ #include #include -#include +#include #include "blk.h" struct disk_events { diff --git a/block/genhd.c b/block/genhd.c index 6ae990ff02660..9589d1d59afab 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/block/holder.c b/block/holder.c index 27cddce1b4461..8d750281a1cd9 100644 --- a/block/holder.c +++ b/block/holder.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -#include +#include #include struct bd_holder_disk { diff --git a/block/partitions/check.h b/block/partitions/check.h index d5b28e309d64d..4ffa2359b1a37 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include -#include #include "../blk.h" /* diff --git a/block/partitions/core.c b/block/partitions/core.c index c2a1635922b1c..2ef8dfa1e5c85 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/block/partitions/efi.h b/block/partitions/efi.h index 8cc2b88d0aa85..84b9f36b9e479 100644 --- a/block/partitions/efi.h +++ b/block/partitions/efi.h @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index 8693704dcf5e9..0a747a0c782d5 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -14,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/block/sed-opal.c b/block/sed-opal.c index daafadbb88cae..9700197000f20 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/base/class.c b/drivers/base/class.c index 7476f393df977..8feb85e186e3b 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include "base.h" diff --git a/drivers/base/core.c b/drivers/base/core.c index 7bb957b118611..3d6430eb0c6a1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index f41063ac1aee4..db5a03a0618ea 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 52484bcdedb92..8a91fcac6f829 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 6af111f568e4a..cc11f89a0928f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f27d5b0f9a0bb..acb1ad3c06035 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index e6005c2323281..cba956881d55c 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 88f4206310e4c..6816beb45352b 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -15,7 +15,6 @@ #include #include #include -#include /* Offset of Subsystem Device ID in pci confoguration space */ #define PCI_SUBSYSTEM_DEVICEID 0x2E diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c index 4db98e0e76f0e..feaa76c5a3423 100644 --- a/drivers/block/rnbd/rnbd-srv-sysfs.c +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 146d85d80e0e7..dd0a1a6fed296 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cb253d80d72b9..342dbcb3f2208 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index faead41709bcd..8e78b37d0f6a4 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/char/random.c b/drivers/char/random.c index 68613f0b68877..f206c87c62028 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -330,7 +330,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 140f35dc0c457..c31a62b963f00 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index b855fef4f38a6..72d18c3fbf1f6 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index e86b04bc1d6bb..dc7f1532a37f7 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c index 5612ee628425b..52ce5162538a4 100644 --- a/drivers/mtd/nand/raw/sharpsl.c +++ b/drivers/mtd/nand/raw/sharpsl.c @@ -6,7 +6,6 @@ * Based on Sharp's NAND driver sharp_sl.c */ -#include #include #include #include diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 228c33b8d1d69..c1db43524d755 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index da3f007a12115..cbd994f7f1fe6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 8b52e5144f084..e5a58520d3982 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -4,7 +4,6 @@ */ #include #include -#include #include #include #include diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 9dc7f3edd42b1..5bbe31b08581b 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 58eda16f5c534..c31e184bfa45e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 8b458010f88a1..3b7af00a7825f 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 61ecdcb2cc6af..2a9c0ddcade59 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h index a05a4297cfae2..af82b32147741 100644 --- a/drivers/s390/block/scm_blk.h +++ b/drivers/s390/block/scm_blk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 2104973a35cd3..911cc72dd7acd 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index 0ffdb8f2995f7..acdc0aceca5ef 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 62eb9921cc947..2d648d27bfd71 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h index 339c624e04d86..1609f02ed29ac 100644 --- a/drivers/scsi/sr.h +++ b/drivers/scsi/sr.h @@ -18,7 +18,6 @@ #ifndef _SR_H #define _SR_H -#include #include #include diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index bf8ae4825a06e..6045678365a59 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 807d06ecadee2..0fae71ac5cc8a 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7e9f90fa0388b..abac86a758401 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -78,7 +78,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/dax.c b/fs/dax.c index cd03485867a74..ab0978739eaaa 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index a6002b2d146d8..d87ea98cf5350 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "gfs2.h" #include "incore.h" diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 5beb826524354..8082eb01127cd 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 51ae6f1eb4a55..4688cc7b36926 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include "hfsplus_fs.h" diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 19d36393974ca..9cebb6ba555b6 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index ef9db135c649c..6c977288cc288 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -27,7 +27,6 @@ */ #include -#include #include #include "blocklayout.h" diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e5c0982a381de..b6d01d51a7465 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -4,7 +4,6 @@ */ #include #include -#include #include #include diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc351..99a4384bb8a56 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions Copyright (C) 1992 Drew Eckhardt + */ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H -#include -#include +#include +#include +#include #include #include #include @@ -12,11 +16,15 @@ #include #include #include +#include #include #include #include +#include #include #include +#include +#include struct module; struct request_queue; @@ -33,6 +41,10 @@ struct blk_queue_stats; struct blk_stat_callback; struct blk_crypto_profile; +extern const struct device_type disk_type; +extern struct device_type part_type; +extern struct class block_class; + /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -45,6 +57,144 @@ struct blk_crypto_profile; */ #define BLKCG_MAX_POLS 6 +#define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 + +#define PARTITION_META_INFO_VOLNAMELTH 64 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) + +struct partition_meta_info { + char uuid[PARTITION_META_INFO_UUIDLTH]; + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. + * + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. + * + */ +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; + +enum { + DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ + DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ +}; + +enum { + /* Poll even if events_poll_msecs is unset */ + DISK_EVENT_FLAG_POLL = 1 << 0, + /* Forward events to udev */ + DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, +}; + +struct disk_events; +struct badblocks; + +struct blk_integrity { + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; +}; + +struct gendisk { + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. + */ + int major; + int first_minor; + int minors; + + char disk_name[DISK_NAME_LEN]; /* name of major driver */ + + unsigned short events; /* supported events */ + unsigned short event_flags; /* flags related to event processing */ + + struct xarray part_tbl; + struct block_device *part0; + + const struct block_device_operations *fops; + struct request_queue *queue; + void *private_data; + + int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 +#define GD_READ_ONLY 1 +#define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 + + struct mutex open_mutex; /* open/close mutex */ + unsigned open_partitions; /* number of open partitions */ + + struct backing_dev_info *bdi; + struct kobject *slave_dir; +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED + struct list_head slave_bdevs; +#endif + struct timer_rand_state *random; + atomic_t sync_io; /* RAID */ + struct disk_events *ev; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct kobject integrity_kobj; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif + int node_id; + struct badblocks *bb; + struct lockdep_map lockdep_map; + u64 diskseq; +}; + +static inline bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} + +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return MKDEV(disk->major, disk->first_minor); +} + static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) @@ -596,6 +746,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int __must_check add_disk(struct gendisk *disk) +{ + return device_add_disk(NULL, disk, NULL); +} +void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); +void set_disk_ro(struct gendisk *disk, bool read_only); +void disk_uevent(struct gendisk *disk, enum kobject_action action); + +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0->bd_read_only || + test_bit(GD_READ_ONLY, &disk->state); +} + +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); +bool disk_force_media_change(struct gendisk *disk, unsigned int events); + +void add_disk_randomness(struct gendisk *disk) __latent_entropy; +void rand_initialize_disk(struct gendisk *disk); + +static inline sector_t get_start_sect(struct block_device *bdev) +{ + return bdev->bd_start_sect; +} + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; +} + +static inline sector_t get_capacity(struct gendisk *disk) +{ + return bdev_nr_sectors(disk->part0); +} + +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + +int bdev_disk_changed(struct gendisk *disk, bool invalidate); + +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); +void put_disk(struct gendisk *disk); +struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); + +/** + * blk_alloc_disk - allocate a gendisk structure + * @node_id: numa node to allocate on + * + * Allocate and pre-initialize a gendisk structure for use with BIO based + * drivers. + * + * Context: can sleep + */ +#define blk_alloc_disk(node_id) \ +({ \ + static struct lock_class_key __key; \ + \ + __blk_alloc_disk(node_id, &__key); \ +}) +void blk_cleanup_disk(struct gendisk *disk); + +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) +void unregister_blkdev(unsigned int major, const char *name); + +bool bdev_check_media_change(struct block_device *bdev); +int __invalidate_device(struct block_device *bdev, bool kill_dirty); +void set_capacity(struct gendisk *disk, sector_t size); + +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +int bd_register_pending_holders(struct gendisk *disk); +#else +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +static inline int bd_register_pending_holders(struct gendisk *disk) +{ + return 0; +} +#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ + +dev_t part_devt(struct gendisk *disk, u8 partno); +void inc_diskseq(struct gendisk *disk); +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); @@ -1311,6 +1573,7 @@ void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1326,7 +1589,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -#endif +static inline void printk_all_partitions(void) +{ +} +#endif /* CONFIG_BLOCK */ + int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h deleted file mode 100644 index aa4bd985dbe51..0000000000000 --- a/include/linux/genhd.h +++ /dev/null @@ -1,287 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_GENHD_H -#define _LINUX_GENHD_H - -/* - * genhd.h Copyright (C) 1992 Drew Eckhardt - * Generic hard disk header file by - * Drew Eckhardt - * - * - */ - -#include -#include -#include -#include -#include -#include - -extern const struct device_type disk_type; -extern struct device_type part_type; -extern struct class block_class; - -#define DISK_MAX_PARTS 256 -#define DISK_NAME_LEN 32 - -#define PARTITION_META_INFO_VOLNAMELTH 64 -/* - * Enough for the string representation of any kind of UUID plus NULL. - * EFI UUID is 36 characters. MSDOS UUID is 11 characters. - */ -#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) - -struct partition_meta_info { - char uuid[PARTITION_META_INFO_UUIDLTH]; - u8 volname[PARTITION_META_INFO_VOLNAMELTH]; -}; - -/** - * DOC: genhd capability flags - * - * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to - * removable media. When set, the device remains present even when media is not - * inserted. Shall not be set for devices which are removed entirely when the - * media is removed. - * - * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, - * doesn't appear in sysfs, and can't be opened from userspace or using - * blkdev_get*. Used for the underlying components of multipath devices. - * - * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not - * scan for partitions from add_disk, and users can't add partitions manually. - * - */ -enum { - GENHD_FL_REMOVABLE = 1 << 0, - GENHD_FL_HIDDEN = 1 << 1, - GENHD_FL_NO_PART = 1 << 2, -}; - -enum { - DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ - DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ -}; - -enum { - /* Poll even if events_poll_msecs is unset */ - DISK_EVENT_FLAG_POLL = 1 << 0, - /* Forward events to udev */ - DISK_EVENT_FLAG_UEVENT = 1 << 1, - /* Block event polling when open for exclusive write */ - DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, -}; - -struct disk_events; -struct badblocks; - -struct blk_integrity { - const struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; -}; - -struct gendisk { - /* - * major/first_minor/minors should not be set by any new driver, the - * block core will take care of allocating them automatically. - */ - int major; - int first_minor; - int minors; - - char disk_name[DISK_NAME_LEN]; /* name of major driver */ - - unsigned short events; /* supported events */ - unsigned short event_flags; /* flags related to event processing */ - - struct xarray part_tbl; - struct block_device *part0; - - const struct block_device_operations *fops; - struct request_queue *queue; - void *private_data; - - int flags; - unsigned long state; -#define GD_NEED_PART_SCAN 0 -#define GD_READ_ONLY 1 -#define GD_DEAD 2 -#define GD_NATIVE_CAPACITY 3 - - struct mutex open_mutex; /* open/close mutex */ - unsigned open_partitions; /* number of open partitions */ - - struct backing_dev_info *bdi; - struct kobject *slave_dir; -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED - struct list_head slave_bdevs; -#endif - struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ - struct disk_events *ev; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct kobject integrity_kobj; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ -#if IS_ENABLED(CONFIG_CDROM) - struct cdrom_device_info *cdi; -#endif - int node_id; - struct badblocks *bb; - struct lockdep_map lockdep_map; - u64 diskseq; -}; - -static inline bool disk_live(struct gendisk *disk) -{ - return !inode_unhashed(disk->part0->bd_inode); -} - -/* - * The gendisk is refcounted by the part0 block_device, and the bd_device - * therein is also used for device model presentation in sysfs. - */ -#define dev_to_disk(device) \ - (dev_to_bdev(device)->bd_disk) -#define disk_to_dev(disk) \ - (&((disk)->part0->bd_device)) - -#if IS_REACHABLE(CONFIG_CDROM) -#define disk_to_cdi(disk) ((disk)->cdi) -#else -#define disk_to_cdi(disk) NULL -#endif - -static inline dev_t disk_devt(struct gendisk *disk) -{ - return MKDEV(disk->major, disk->first_minor); -} - -void disk_uevent(struct gendisk *disk, enum kobject_action action); - -/* block/genhd.c */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups); -static inline int __must_check add_disk(struct gendisk *disk) -{ - return device_add_disk(NULL, disk, NULL); -} -extern void del_gendisk(struct gendisk *gp); - -void invalidate_disk(struct gendisk *disk); - -void set_disk_ro(struct gendisk *disk, bool read_only); - -static inline int get_disk_ro(struct gendisk *disk) -{ - return disk->part0->bd_read_only || - test_bit(GD_READ_ONLY, &disk->state); -} - -static inline int bdev_read_only(struct block_device *bdev) -{ - return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); -} - -bool set_capacity_and_notify(struct gendisk *disk, sector_t size); -bool disk_force_media_change(struct gendisk *disk, unsigned int events); - -/* drivers/char/random.c */ -extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -extern void rand_initialize_disk(struct gendisk *disk); - -static inline sector_t get_start_sect(struct block_device *bdev) -{ - return bdev->bd_start_sect; -} - -static inline sector_t bdev_nr_sectors(struct block_device *bdev) -{ - return bdev->bd_nr_sectors; -} - -static inline loff_t bdev_nr_bytes(struct block_device *bdev) -{ - return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; -} - -static inline sector_t get_capacity(struct gendisk *disk) -{ - return bdev_nr_sectors(disk->part0); -} - -static inline u64 sb_bdev_nr_blocks(struct super_block *sb) -{ - return bdev_nr_sectors(sb->s_bdev) >> - (sb->s_blocksize_bits - SECTOR_SHIFT); -} - -int bdev_disk_changed(struct gendisk *disk, bool invalidate); - -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); -extern void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); - -/** - * blk_alloc_disk - allocate a gendisk structure - * @node_id: numa node to allocate on - * - * Allocate and pre-initialize a gendisk structure for use with BIO based - * drivers. - * - * Context: can sleep - */ -#define blk_alloc_disk(node_id) \ -({ \ - static struct lock_class_key __key; \ - \ - __blk_alloc_disk(node_id, &__key); \ -}) -void blk_cleanup_disk(struct gendisk *disk); - -int __register_blkdev(unsigned int major, const char *name, - void (*probe)(dev_t devt)); -#define register_blkdev(major, name) \ - __register_blkdev(major, name, NULL) -void unregister_blkdev(unsigned int major, const char *name); - -bool bdev_check_media_change(struct block_device *bdev); -int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void set_capacity(struct gendisk *disk, sector_t size); - -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED -int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); -int bd_register_pending_holders(struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -static inline int bd_register_pending_holders(struct gendisk *disk) -{ - return 0; -} -#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ - -dev_t part_devt(struct gendisk *disk, u8 partno); -void inc_diskseq(struct gendisk *disk); -dev_t blk_lookup_devt(const char *name, int partno); -void blk_request_module(dev_t devt); -#ifdef CONFIG_BLOCK -void printk_all_partitions(void); -#else /* CONFIG_BLOCK */ -static inline void printk_all_partitions(void) -{ -} -#endif /* CONFIG_BLOCK */ - -#endif /* _LINUX_GENHD_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 6f7949b2fd8dc..abeba356bc3f5 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -2,7 +2,7 @@ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H -#include +#include #include struct disk_stats { diff --git a/init/do_mounts.c b/init/do_mounts.c index 762b534978d95..7058e14ad5f70 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e6af502c2fd77..a94044197c4a6 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ad10359030a4c..f1bd031295752 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 320ca80aacab5..02882526ba9a3 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include -- GitLab From d5f68a42da7a4516e7503c281a54a58727f07dc3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:49 +0100 Subject: [PATCH 0181/1586] fs: remove mpage_alloc open code mpage_alloc in it's two callers and simplify the results because of the context: - __mpage_writepage always passes GFP_NOFS and can thus always sleep and will never get a NULL return from bio_alloc at all. - do_mpage_readpage can only get a non-sleeping context for readahead which never sets PF_MEMALLOC and thus doesn't need the retry loop either. Both cases will never have __GFP_HIGH set. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-2-hch@lst.de Signed-off-by: Jens Axboe --- fs/mpage.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/fs/mpage.c b/fs/mpage.c index 87f5cfef6caa7..06e95d777e940 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -66,29 +66,6 @@ static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) return NULL; } -static struct bio * -mpage_alloc(struct block_device *bdev, - sector_t first_sector, int nr_vecs, - gfp_t gfp_flags) -{ - struct bio *bio; - - /* Restrict the given (page cache) mask for slab allocations */ - gfp_flags &= GFP_KERNEL; - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio_set_dev(bio, bdev); - bio->bi_iter.bi_sector = first_sector; - } - return bio; -} - /* * support function for mpage_readahead. The fs supplied get_block might * return an up to date buffer. This is used to map that buffer into @@ -296,10 +273,11 @@ alloc_new: page)) goto out; } - args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - bio_max_segs(args->nr_pages), gfp); + args->bio = bio_alloc(gfp, bio_max_segs(args->nr_pages)); if (args->bio == NULL) goto confused; + bio_set_dev(args->bio, bdev); + args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); } length = first_hole << blkbits; @@ -608,10 +586,9 @@ alloc_new: page, wbc)) goto out; } - bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - BIO_MAX_VECS, GFP_NOFS|__GFP_HIGH); - if (bio == NULL) - goto confused; + bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS); + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); wbc_init_bio(wbc, bio); bio->bi_write_hint = inode->i_write_hint; -- GitLab From f0d911927b3c7cf5f9edb5941d0287144a602d0d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:50 +0100 Subject: [PATCH 0182/1586] nilfs2: remove nilfs_alloc_seg_bio bio_alloc will never fail when it can sleep. Remove the now simple nilfs_alloc_seg_bio helper and open code it in the only caller. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-3-hch@lst.de Signed-off-by: Jens Axboe --- fs/nilfs2/segbuf.c | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 43287b0d3e9b6..53b7c6d21cdd8 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -371,29 +371,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, return err; } -/** - * nilfs_alloc_seg_bio - allocate a new bio for writing log - * @nilfs: nilfs object - * @start: start block number of the bio - * @nr_vecs: request size of page vector. - * - * Return Value: On success, pointer to the struct bio is returned. - * On error, NULL is returned. - */ -static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, - int nr_vecs) -{ - struct bio *bio; - - bio = bio_alloc(GFP_NOIO, nr_vecs); - if (likely(bio)) { - bio_set_dev(bio, nilfs->ns_bdev); - bio->bi_iter.bi_sector = - start << (nilfs->ns_blocksize_bits - 9); - } - return bio; -} - static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi) { @@ -414,10 +391,10 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end, - wi->nr_vecs); - if (unlikely(!wi->bio)) - return -ENOMEM; + wi->bio = bio_alloc(GFP_NOIO, wi->nr_vecs); + bio_set_dev(wi->bio, wi->nilfs->ns_bdev); + wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) << + (wi->nilfs->ns_blocksize_bits - 9); } len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); -- GitLab From 5d2ca2132f889bc2c90d6d07fc9fc129cfee8955 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:51 +0100 Subject: [PATCH 0183/1586] nfs/blocklayout: remove bl_alloc_init_bio bio_alloc will never fail when it can sleep. Remove the now simple bl_alloc_init_bio helper and open code it in the only caller. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-4-hch@lst.de Signed-off-by: Jens Axboe --- fs/nfs/blocklayout/blocklayout.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index fe860c5387476..38e063af7e98a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -115,23 +115,6 @@ bl_submit_bio(struct bio *bio) return NULL; } -static struct bio *bl_alloc_init_bio(unsigned int npg, - struct block_device *bdev, sector_t disk_sector, - bio_end_io_t end_io, struct parallel_io *par) -{ - struct bio *bio; - - npg = bio_max_segs(npg); - bio = bio_alloc(GFP_NOIO, npg); - if (bio) { - bio->bi_iter.bi_sector = disk_sector; - bio_set_dev(bio, bdev); - bio->bi_end_io = end_io; - bio->bi_private = par; - } - return bio; -} - static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map) { return offset >= map->start && offset < map->start + map->len; @@ -171,10 +154,11 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, retry: if (!bio) { - bio = bl_alloc_init_bio(npg, map->bdev, - disk_addr >> SECTOR_SHIFT, end_io, par); - if (!bio) - return ERR_PTR(-ENOMEM); + bio = bio_alloc(GFP_NOIO, bio_max_segs(npg)); + bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT; + bio_set_dev(bio, map->bdev); + bio->bi_end_io = end_io; + bio->bi_private = par; bio_set_op_attrs(bio, rw, 0); } if (bio_add_page(bio, page, *len, offset) < *len) { -- GitLab From 39146b6f66ba5c107d5c5758a17f290846165b4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:52 +0100 Subject: [PATCH 0184/1586] ntfs3: remove ntfs_alloc_bio bio_alloc will never fail if it is allowed to sleep, so there is no need for this loop. Also remove the __GFP_HIGH specifier as it doesn't make sense here given that we'll always fall back to the mempool anyway. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-5-hch@lst.de Signed-off-by: Jens Axboe --- fs/ntfs3/fsntfs.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 4de9acb169689..4a255e21ecf5f 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1443,17 +1443,6 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, return err; } -static inline struct bio *ntfs_alloc_bio(u32 nr_vecs) -{ - struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs); - - if (!bio && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs); - } - return bio; -} - /* * ntfs_bio_pages - Read/write pages from/to disk. */ @@ -1496,11 +1485,7 @@ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; new_bio: - new = ntfs_alloc_bio(nr_pages - page_idx); - if (!new) { - err = -ENOMEM; - goto out; - } + new = bio_alloc(GFP_NOFS, nr_pages - page_idx); if (bio) { bio_chain(bio, new); submit_bio(bio); @@ -1599,11 +1584,7 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run) lbo = (u64)lcn << cluster_bits; len = (u64)clen << cluster_bits; new_bio: - new = ntfs_alloc_bio(BIO_MAX_VECS); - if (!new) { - err = -ENOMEM; - break; - } + new = bio_alloc(GFP_NOFS, BIO_MAX_VECS); if (bio) { bio_chain(bio, new); submit_bio(bio); -- GitLab From 53db984e004c7116ce69e2f4a163664453336ae1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:53 +0100 Subject: [PATCH 0185/1586] dm: bio_alloc can't fail if it is allowed to sleep Remove handling of NULL returns from sleeping bio_alloc calls given that those can't fail. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-crypt.c | 5 +---- drivers/md/dm-log-writes.c | 18 ------------------ drivers/md/dm-thin.c | 25 +++++++++---------------- drivers/md/dm-zoned-metadata.c | 11 ----------- drivers/md/dm.c | 2 -- 5 files changed, 10 insertions(+), 51 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d4ae31558826a..20abe3486aba1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1673,9 +1673,6 @@ retry: mutex_lock(&cc->bio_alloc_lock); clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs); - if (!clone) - goto out; - clone_init(io, clone); remaining_size = size; @@ -1702,7 +1699,7 @@ retry: bio_put(clone); clone = NULL; } -out: + if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) mutex_unlock(&cc->bio_alloc_lock); diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 139b09b06eda9..25f5e8d2d417b 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -218,10 +218,6 @@ static int write_metadata(struct log_writes_c *lc, void *entry, size_t ret; bio = bio_alloc(GFP_KERNEL, 1); - if (!bio) { - DMERR("Couldn't alloc log bio"); - goto error; - } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); @@ -276,11 +272,6 @@ static int write_inline_data(struct log_writes_c *lc, void *entry, atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, bio_pages); - if (!bio) { - DMERR("Couldn't alloc inline data bio"); - goto error; - } - bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); @@ -322,7 +313,6 @@ static int write_inline_data(struct log_writes_c *lc, void *entry, error_bio: bio_free_pages(bio); bio_put(bio); -error: put_io_block(lc); return -1; } @@ -364,10 +354,6 @@ static int log_one_block(struct log_writes_c *lc, atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt)); - if (!bio) { - DMERR("Couldn't alloc log bio"); - goto error; - } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); @@ -387,10 +373,6 @@ static int log_one_block(struct log_writes_c *lc, submit_bio(bio); bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt - i)); - if (!bio) { - DMERR("Couldn't alloc log bio"); - goto error; - } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ec119d2422d5d..76a9c2e9aeeea 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1180,24 +1180,17 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) } discard_parent = bio_alloc(GFP_NOIO, 1); - if (!discard_parent) { - DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.", - dm_device_name(tc->pool->pool_md)); - queue_passdown_pt2(m); + discard_parent->bi_end_io = passdown_endio; + discard_parent->bi_private = m; - } else { - discard_parent->bi_end_io = passdown_endio; - discard_parent->bi_private = m; - - if (m->maybe_shared) - passdown_double_checking_shared_status(m, discard_parent); - else { - struct discard_op op; + if (m->maybe_shared) + passdown_double_checking_shared_status(m, discard_parent); + else { + struct discard_op op; - begin_discard(&op, tc, discard_parent); - r = issue_discard(&op, m->data_block, data_end); - end_discard(&op, r); - } + begin_discard(&op, tc, discard_parent); + r = issue_discard(&op, m->data_block, data_end); + end_discard(&op, r); } } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index ee4626d085574..5718b83cc7182 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -551,10 +551,6 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, return ERR_PTR(-ENOMEM); bio = bio_alloc(GFP_NOIO, 1); - if (!bio) { - dmz_free_mblock(zmd, mblk); - return ERR_PTR(-ENOMEM); - } spin_lock(&zmd->mblk_lock); @@ -726,10 +722,6 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, return -EIO; bio = bio_alloc(GFP_NOIO, 1); - if (!bio) { - set_bit(DMZ_META_ERROR, &mblk->state); - return -ENOMEM; - } set_bit(DMZ_META_WRITING, &mblk->state); @@ -760,9 +752,6 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, return -EIO; bio = bio_alloc(GFP_NOIO, 1); - if (!bio) - return -ENOMEM; - bio->bi_iter.bi_sector = dmz_blk2sect(block); bio_set_dev(bio, dev->bdev); bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dcbd6d201619d..e431f72c10bf4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -520,8 +520,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct bio *clone; clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs); - if (!clone) - return NULL; tio = container_of(clone, struct dm_target_io, clone); tio->inside_dm_io = true; -- GitLab From 3f868c09ea8f40f800c4c644c072d91c9eee0d71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:54 +0100 Subject: [PATCH 0186/1586] dm-crypt: remove clone_init Just open code it next to the bio allocations, which saves a few lines of code, prepares for future changes and allows to remove the duplicate bi_opf assignment for the bio_clone_fast case in kcryptd_io_read. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-crypt.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 20abe3486aba1..3c5ecd35d3483 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -234,7 +234,7 @@ static volatile unsigned long dm_crypt_pages_per_client; #define DM_CRYPT_MEMORY_PERCENT 2 #define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16) -static void clone_init(struct dm_crypt_io *, struct bio *); +static void crypt_endio(struct bio *clone); static void kcryptd_queue_crypt(struct dm_crypt_io *io); static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc, struct scatterlist *sg); @@ -1673,7 +1673,10 @@ retry: mutex_lock(&cc->bio_alloc_lock); clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs); - clone_init(io, clone); + clone->bi_private = io; + clone->bi_end_io = crypt_endio; + bio_set_dev(clone, cc->dev->bdev); + clone->bi_opf = io->base_bio->bi_opf; remaining_size = size; @@ -1826,16 +1829,6 @@ static void crypt_endio(struct bio *clone) crypt_dec_pending(io); } -static void clone_init(struct dm_crypt_io *io, struct bio *clone) -{ - struct crypt_config *cc = io->cc; - - clone->bi_private = io; - clone->bi_end_io = crypt_endio; - bio_set_dev(clone, cc->dev->bdev); - clone->bi_opf = io->base_bio->bi_opf; -} - static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { struct crypt_config *cc = io->cc; @@ -1850,10 +1843,12 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) clone = bio_clone_fast(io->base_bio, gfp, &cc->bs); if (!clone) return 1; + clone->bi_private = io; + clone->bi_end_io = crypt_endio; + bio_set_dev(clone, cc->dev->bdev); crypt_inc_pending(io); - clone_init(io, clone); clone->bi_iter.bi_sector = cc->start + io->sector; if (dm_crypt_integrity_io_alloc(io, clone)) { -- GitLab From eba33b8ef1b90d8996eceb0569c06a4f784ef2b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:55 +0100 Subject: [PATCH 0187/1586] dm-snap: use blkdev_issue_flush instead of open coding it Use blkdev_issue_flush, which uses an on-stack bio instead of an opencoded version with a bio embedded into struct dm_snapshot. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-snap.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index dcf34c6b05ad3..0d336b5ec5714 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -141,11 +141,6 @@ struct dm_snapshot { * for them to be committed. */ struct bio_list bios_queued_during_merge; - - /* - * Flush data after merge. - */ - struct bio flush_bio; }; /* @@ -1127,17 +1122,6 @@ shut: static void error_bios(struct bio *bio); -static int flush_data(struct dm_snapshot *s) -{ - struct bio *flush_bio = &s->flush_bio; - - bio_reset(flush_bio); - bio_set_dev(flush_bio, s->origin->bdev); - flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - return submit_bio_wait(flush_bio); -} - static void merge_callback(int read_err, unsigned long write_err, void *context) { struct dm_snapshot *s = context; @@ -1151,7 +1135,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) goto shut; } - if (flush_data(s) < 0) { + if (blkdev_issue_flush(s->origin->bdev) < 0) { DMERR("Flush after merge failed: shutting down merge"); goto shut; } @@ -1340,7 +1324,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); - bio_init(&s->flush_bio, NULL, 0); /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -1528,8 +1511,6 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); - bio_uninit(&s->flush_bio); - dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); -- GitLab From 28d7d128aad5cd2178b158900d58365d1fd3de94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:56 +0100 Subject: [PATCH 0188/1586] dm-thin: use blkdev_issue_flush instead of open coding it Use blkdev_issue_flush, which uses an on-stack bio instead of an opencoded version with a bio embedded into struct pool. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-thin.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 76a9c2e9aeeea..411a3f56ed90c 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -282,8 +282,6 @@ struct pool { struct dm_bio_prison_cell **cell_sort_array; mempool_t mapping_pool; - - struct bio flush_bio; }; static void metadata_operation_failed(struct pool *pool, const char *op, int r); @@ -2906,7 +2904,6 @@ static void __pool_destroy(struct pool *pool) if (pool->next_mapping) mempool_free(pool->next_mapping, &pool->mapping_pool); mempool_exit(&pool->mapping_pool); - bio_uninit(&pool->flush_bio); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); @@ -2987,7 +2984,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->low_water_triggered = false; pool->suspended = true; pool->out_of_data_space = false; - bio_init(&pool->flush_bio, NULL, 0); pool->shared_read_ds = dm_deferred_set_create(); if (!pool->shared_read_ds) { @@ -3194,13 +3190,8 @@ static void metadata_low_callback(void *context) static int metadata_pre_commit_callback(void *context) { struct pool *pool = context; - struct bio *flush_bio = &pool->flush_bio; - - bio_reset(flush_bio); - bio_set_dev(flush_bio, pool->data_dev); - flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - return submit_bio_wait(flush_bio); + return blkdev_issue_flush(pool->data_dev); } static sector_t get_dev_size(struct block_device *bdev) -- GitLab From 4b1dc86d1857f1007865cab759f2285280692eee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:57 +0100 Subject: [PATCH 0189/1586] drbd: bio_alloc can't fail if it is allow to sleep Remove handling of NULL returns from sleeping bio_alloc calls given that those can't fail. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6df2539e215ba..fb59b263deeef 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1281,14 +1281,13 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont { struct bio *bio = bio_alloc(GFP_NOIO, 0); struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); - if (!bio || !octx) { - drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n"); + + if (!octx) { + drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n"); /* FIXME: what else can I do now? disconnecting or detaching * really does not help to improve the state of the world, either. */ - kfree(octx); - if (bio) - bio_put(bio); + bio_put(bio); ctx->error = -ENOMEM; put_ldev(device); @@ -1646,7 +1645,6 @@ int drbd_submit_peer_request(struct drbd_device *device, unsigned data_size = peer_req->i.size; unsigned n_bios = 0; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; - int err = -ENOMEM; /* TRIM/DISCARD: for now, always use the helper function * blkdev_issue_zeroout(..., discard=true). @@ -1688,10 +1686,6 @@ int drbd_submit_peer_request(struct drbd_device *device, */ next_bio: bio = bio_alloc(GFP_NOIO, nr_pages); - if (!bio) { - drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages); - goto fail; - } /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; bio_set_dev(bio, device->ldev->backing_bdev); @@ -1726,14 +1720,6 @@ next_bio: drbd_submit_bio_noacct(device, fault_type, bio); } while (bios); return 0; - -fail: - while (bios) { - bio = bios; - bios = bios->bi_next; - bio_put(bio); - } - return err; } static void drbd_remove_epoch_entry_interval(struct drbd_device *device, -- GitLab From 1fe0640ff94feae6d21417e2f4f2829b882274b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:58 +0100 Subject: [PATCH 0190/1586] rnbd-srv: simplify bio mapping in process_rdma The memory mapped in process_rdma is contiguous, so there is no need to loop over bio_add_page. Remove rnbd_bio_map_kern and just open code the bio allocation and mapping in the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Jack Wang Tested-by: Jack Wang Link: https://lore.kernel.org/r/20220124091107.642561-11-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv-dev.c | 57 ------------------------------- drivers/block/rnbd/rnbd-srv-dev.h | 5 --- drivers/block/rnbd/rnbd-srv.c | 23 +++++++++---- 3 files changed, 16 insertions(+), 69 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c index b241a099aeae2..98d3e591a0885 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.c +++ b/drivers/block/rnbd/rnbd-srv-dev.c @@ -44,60 +44,3 @@ void rnbd_dev_close(struct rnbd_dev *dev) blkdev_put(dev->bdev, dev->blk_open_flags); kfree(dev); } - -void rnbd_dev_bi_end_io(struct bio *bio) -{ - struct rnbd_dev_blk_io *io = bio->bi_private; - - rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status)); - bio_put(bio); -} - -/** - * rnbd_bio_map_kern - map kernel address into bio - * @data: pointer to buffer to map - * @bs: bio_set to use. - * @len: length in bytes - * @gfp_mask: allocation flags for bio allocation - * - * Map the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, - unsigned int len, gfp_t gfp_mask) -{ - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int offset, i; - struct bio *bio; - - bio = bio_alloc_bioset(gfp_mask, nr_pages, bs); - if (!bio) - return ERR_PTR(-ENOMEM); - - offset = offset_in_page(kaddr); - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - if (bio_add_page(bio, virt_to_page(data), bytes, - offset) < bytes) { - /* we don't support partial mappings */ - bio_put(bio); - return ERR_PTR(-EINVAL); - } - - data += bytes; - len -= bytes; - offset = 0; - } - - return bio; -} diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index 0eb23850afb95..1a14ece0be726 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -41,11 +41,6 @@ void rnbd_dev_close(struct rnbd_dev *dev); void rnbd_endio(void *priv, int error); -void rnbd_dev_bi_end_io(struct bio *bio); - -struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, - unsigned int len, gfp_t gfp_mask); - static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev) { return queue_max_segments(bdev_get_queue(dev->bdev)); diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 1ee808fc600cf..6d228af1dcc35 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -114,6 +114,14 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess) return sess_dev; } +static void rnbd_dev_bi_end_io(struct bio *bio) +{ + struct rnbd_dev_blk_io *io = bio->bi_private; + + rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status)); + bio_put(bio); +} + static int process_rdma(struct rnbd_srv_session *srv_sess, struct rtrs_srv_op *id, void *data, u32 datalen, const void *usr, size_t usrlen) @@ -144,12 +152,12 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - /* Generate bio with pages pointing to the rdma buffer */ - bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL); - if (IS_ERR(bio)) { - err = PTR_ERR(bio); - rnbd_srv_err(sess_dev, "Failed to generate bio, err: %d\n", err); - goto sess_dev_put; + bio = bio_alloc_bioset(GFP_KERNEL, 1, sess_dev->rnbd_dev->ibd_bio_set); + if (bio_add_page(bio, virt_to_page(data), datalen, + offset_in_page(data)) != datalen) { + rnbd_srv_err(sess_dev, "Failed to map data to bio\n"); + err = -EINVAL; + goto bio_put; } io = container_of(bio, struct rnbd_dev_blk_io, bio); @@ -170,7 +178,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, return 0; -sess_dev_put: +bio_put: + bio_put(bio); rnbd_put_sess_dev(sess_dev); err: kfree(priv); -- GitLab From d7b78de2b1552e3e7ce3a069f075cc2729aa5c34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:10:59 +0100 Subject: [PATCH 0191/1586] rnbd-srv: remove struct rnbd_dev_blk_io Only the priv field of rnbd_dev_blk_io is used, so store the value of that in bio->bi_private directly and remove the entire bio_set overhead. Signed-off-by: Christoph Hellwig Reviewed-by: Jack Wang Link: https://lore.kernel.org/r/20220124091107.642561-12-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv-dev.c | 4 +--- drivers/block/rnbd/rnbd-srv-dev.h | 13 ++----------- drivers/block/rnbd/rnbd-srv.c | 27 ++++----------------------- drivers/block/rnbd/rnbd-srv.h | 1 - 4 files changed, 7 insertions(+), 38 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c index 98d3e591a0885..c5d0a03911659 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.c +++ b/drivers/block/rnbd/rnbd-srv-dev.c @@ -12,8 +12,7 @@ #include "rnbd-srv-dev.h" #include "rnbd-log.h" -struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags, - struct bio_set *bs) +struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags) { struct rnbd_dev *dev; int ret; @@ -30,7 +29,6 @@ struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags, dev->blk_open_flags = flags; bdevname(dev->bdev, dev->name); - dev->ibd_bio_set = bs; return dev; diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index 1a14ece0be726..2c3df02b5e8ec 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -14,25 +14,16 @@ struct rnbd_dev { struct block_device *bdev; - struct bio_set *ibd_bio_set; fmode_t blk_open_flags; char name[BDEVNAME_SIZE]; }; -struct rnbd_dev_blk_io { - struct rnbd_dev *dev; - void *priv; - /* have to be last member for front_pad usage of bioset_init */ - struct bio bio; -}; - /** * rnbd_dev_open() - Open a device + * @path: path to open * @flags: open flags - * @bs: bio_set to use during block io, */ -struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags, - struct bio_set *bs); +struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags); /** * rnbd_dev_close() - Close a device diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 6d228af1dcc35..ff9b389976078 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -116,9 +116,7 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess) static void rnbd_dev_bi_end_io(struct bio *bio) { - struct rnbd_dev_blk_io *io = bio->bi_private; - - rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status)); + rnbd_endio(bio->bi_private, blk_status_to_errno(bio->bi_status)); bio_put(bio); } @@ -131,7 +129,6 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, struct rnbd_srv_sess_dev *sess_dev; u32 dev_id; int err; - struct rnbd_dev_blk_io *io; struct bio *bio; short prio; @@ -152,7 +149,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - bio = bio_alloc_bioset(GFP_KERNEL, 1, sess_dev->rnbd_dev->ibd_bio_set); + bio = bio_alloc(GFP_KERNEL, 1); if (bio_add_page(bio, virt_to_page(data), datalen, offset_in_page(data)) != datalen) { rnbd_srv_err(sess_dev, "Failed to map data to bio\n"); @@ -160,12 +157,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, goto bio_put; } - io = container_of(bio, struct rnbd_dev_blk_io, bio); - io->dev = sess_dev->rnbd_dev; - io->priv = priv; - bio->bi_end_io = rnbd_dev_bi_end_io; - bio->bi_private = io; + bio->bi_private = priv; bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw)); bio->bi_iter.bi_sector = le64_to_cpu(msg->sector); bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size); @@ -260,7 +253,6 @@ static void destroy_sess(struct rnbd_srv_session *srv_sess) out: xa_destroy(&srv_sess->index_idr); - bioset_exit(&srv_sess->sess_bio_set); pr_info("RTRS Session %s disconnected\n", srv_sess->sessname); @@ -289,16 +281,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs) return -ENOMEM; srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs); - err = bioset_init(&srv_sess->sess_bio_set, srv_sess->queue_depth, - offsetof(struct rnbd_dev_blk_io, bio), - BIOSET_NEED_BVECS); - if (err) { - pr_err("Allocating srv_session for path %s failed\n", - pathname); - kfree(srv_sess); - return err; - } - xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC); INIT_LIST_HEAD(&srv_sess->sess_dev_list); mutex_init(&srv_sess->lock); @@ -747,8 +729,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, goto reject; } - rnbd_dev = rnbd_dev_open(full_path, open_flags, - &srv_sess->sess_bio_set); + rnbd_dev = rnbd_dev_open(full_path, open_flags); if (IS_ERR(rnbd_dev)) { pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n", full_path, srv_sess->sessname, PTR_ERR(rnbd_dev)); diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index e5604bce123ab..be2ae486d407e 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -23,7 +23,6 @@ struct rnbd_srv_session { struct rtrs_srv_sess *rtrs; char sessname[NAME_MAX]; int queue_depth; - struct bio_set sess_bio_set; struct xarray index_idr; /* List of struct rnbd_srv_sess_dev */ -- GitLab From 7d8d0c658d48705fca35238a8ff601b7c5cbc0de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:00 +0100 Subject: [PATCH 0192/1586] xen-blkback: bio_alloc can't fail if it is allow to sleep Remove handling of NULL returns from sleeping bio_alloc calls given that those can't fail. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-13-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkback/blkback.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 14e452896d04c..6bb2ad7692065 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1327,9 +1327,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, seg[i].nsec << 9, seg[i].offset) == 0)) { bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i)); - if (unlikely(bio == NULL)) - goto fail_put_bio; - biolist[nbio++] = bio; bio_set_dev(bio, preq.bdev); bio->bi_private = pending_req; @@ -1346,9 +1343,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, BUG_ON(operation_flags != REQ_PREFLUSH); bio = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; - biolist[nbio++] = bio; bio_set_dev(bio, preq.bdev); bio->bi_private = pending_req; @@ -1381,14 +1375,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, free_req(ring, pending_req); msleep(1); /* back off a bit */ return -EIO; - - fail_put_bio: - for (i = 0; i < nbio; i++) - bio_put(biolist[i]); - atomic_set(&pending_req->pendcnt, 1); - __end_block_io_op(pending_req, BLK_STS_RESOURCE); - msleep(1); /* back off a bit */ - return -EIO; } -- GitLab From 3b005bf6acf009abd700e2c652c86e5c209cf63d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:01 +0100 Subject: [PATCH 0193/1586] block: move blk_next_bio to bio.c Keep blk_next_bio next to the core bio infrastructure. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-14-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 13 +++++++++++++ block/blk-lib.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/block/bio.c b/block/bio.c index 4312a8085396b..1536579ed490a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -344,6 +344,19 @@ void bio_chain(struct bio *bio, struct bio *parent) } EXPORT_SYMBOL(bio_chain); +struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp) +{ + struct bio *new = bio_alloc(gfp, nr_pages); + + if (bio) { + bio_chain(bio, new); + submit_bio(bio); + } + + return new; +} +EXPORT_SYMBOL_GPL(blk_next_bio); + static void bio_alloc_rescue(struct work_struct *work) { struct bio_set *bs = container_of(work, struct bio_set, rescue_work); diff --git a/block/blk-lib.c b/block/blk-lib.c index 9f09beadcbe30..9245b300ef73e 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -10,19 +10,6 @@ #include "blk.h" -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp) -{ - struct bio *new = bio_alloc(gfp, nr_pages); - - if (bio) { - bio_chain(bio, new); - submit_bio(bio); - } - - return new; -} -EXPORT_SYMBOL_GPL(blk_next_bio); - int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, int flags, struct bio **biop) -- GitLab From 0a3140ea0fae377c9eaa031b7db1670ae422ed47 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 24 Jan 2022 10:11:02 +0100 Subject: [PATCH 0194/1586] block: pass a block_device and opf to blk_next_bio All callers need to set the block_device and operation, so lift that into the common code. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124091107.642561-15-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 6 +++++- block/blk-lib.c | 19 +++++-------------- block/blk-zoned.c | 9 +++------ block/blk.h | 2 -- drivers/nvme/target/zns.c | 6 +++--- include/linux/bio.h | 3 ++- 6 files changed, 18 insertions(+), 27 deletions(-) diff --git a/block/bio.c b/block/bio.c index 1536579ed490a..a0166f29a05c3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -344,10 +344,14 @@ void bio_chain(struct bio *bio, struct bio *parent) } EXPORT_SYMBOL(bio_chain); -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp) +struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, + unsigned int nr_pages, unsigned int opf, gfp_t gfp) { struct bio *new = bio_alloc(gfp, nr_pages); + bio_set_dev(new, bdev); + new->bi_opf = opf; + if (bio) { bio_chain(bio, new); submit_bio(bio); diff --git a/block/blk-lib.c b/block/blk-lib.c index 9245b300ef73e..1b8ced45e4e55 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -82,11 +82,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, WARN_ON_ONCE((req_sects << 9) > UINT_MAX); - bio = blk_next_bio(bio, 0, gfp_mask); + bio = blk_next_bio(bio, bdev, 0, op, gfp_mask); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, op, 0); - bio->bi_iter.bi_size = req_sects << 9; sector += req_sects; nr_sects -= req_sects; @@ -176,14 +173,12 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, max_write_same_sectors = bio_allowed_max_sectors(q); while (nr_sects) { - bio = blk_next_bio(bio, 1, gfp_mask); + bio = blk_next_bio(bio, bdev, 1, REQ_OP_WRITE_SAME, gfp_mask); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); bio->bi_vcnt = 1; bio->bi_io_vec->bv_page = page; bio->bi_io_vec->bv_offset = 0; bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); - bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0); if (nr_sects > max_write_same_sectors) { bio->bi_iter.bi_size = max_write_same_sectors << 9; @@ -252,10 +247,8 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, return -EOPNOTSUPP; while (nr_sects) { - bio = blk_next_bio(bio, 0, gfp_mask); + bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_WRITE_ZEROES; if (flags & BLKDEV_ZERO_NOUNMAP) bio->bi_opf |= REQ_NOUNMAP; @@ -303,11 +296,9 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev, return -EPERM; while (nr_sects != 0) { - bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), - gfp_mask); + bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects), + REQ_OP_WRITE, gfp_mask); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); while (nr_sects != 0) { sz = min((sector_t) PAGE_SIZE, nr_sects << 9); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 774ecc598bee2..5ab755d792c81 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -215,9 +215,8 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev, continue; } - bio = blk_next_bio(bio, 0, gfp_mask); - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC; + bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC, + gfp_mask); bio->bi_iter.bi_sector = sector; sector += zone_sectors; @@ -306,9 +305,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, } while (sector < end_sector) { - bio = blk_next_bio(bio, 0, gfp_mask); - bio_set_dev(bio, bdev); - bio->bi_opf = op | REQ_SYNC; + bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask); bio->bi_iter.bi_sector = sector; sector += zone_sectors; diff --git a/block/blk.h b/block/blk.h index 800c5ae387a0b..abb663a2a147b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -406,8 +406,6 @@ extern int blk_iolatency_init(struct request_queue *q); static inline int blk_iolatency_init(struct request_queue *q) { return 0; } #endif -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); - #ifdef CONFIG_BLK_DEV_ZONED void blk_queue_free_zone_bitmaps(struct request_queue *q); void blk_queue_clear_zone_settings(struct request_queue *q); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 46bc30fe85d2b..247de74247fab 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -412,10 +412,10 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) while (sector < get_capacity(bdev->bd_disk)) { if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) { - bio = blk_next_bio(bio, 0, GFP_KERNEL); - bio->bi_opf = zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC; + bio = blk_next_bio(bio, bdev, 0, + zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC, + GFP_KERNEL); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, bdev); /* This may take a while, so be nice to others */ cond_resched(); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 117d7f248ac96..edeae54074ede 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -790,6 +790,7 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) bio->bi_opf |= REQ_NOWAIT; } -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); +struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, + unsigned int nr_pages, unsigned int opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ -- GitLab From 609be1066731fea86436f5f91022f82e592ab456 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:03 +0100 Subject: [PATCH 0195/1586] block: pass a block_device and opf to bio_alloc_bioset Pass the block_device and operation that we plan to use this bio for to bio_alloc_bioset to optimize the assigment. NULL/0 can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Also move the gfp_mask argument after the nr_vecs argument for a much more logical calling convention matching what most of the kernel does. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-16-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 30 +++++++++++++++++------------ block/bounce.c | 6 ++---- drivers/block/drbd/drbd_actlog.c | 5 ++--- drivers/block/drbd/drbd_bitmap.c | 7 +++---- drivers/md/bcache/request.c | 12 +++++------- drivers/md/dm-crypt.c | 5 ++--- drivers/md/dm-io.c | 5 ++--- drivers/md/dm-writecache.c | 7 ++++--- drivers/md/dm.c | 5 +++-- drivers/md/md.c | 16 +++++++-------- drivers/md/raid1.c | 3 ++- drivers/md/raid10.c | 6 ++---- drivers/md/raid5-cache.c | 8 ++++---- drivers/md/raid5-ppl.c | 11 +++++------ drivers/target/target_core_iblock.c | 6 ++---- fs/btrfs/extent_io.c | 2 +- fs/f2fs/data.c | 7 +++---- fs/iomap/buffered-io.c | 6 +++--- include/linux/bio.h | 7 ++++--- 19 files changed, 75 insertions(+), 79 deletions(-) diff --git a/block/bio.c b/block/bio.c index a0166f29a05c3..9afc0c2aca6e4 100644 --- a/block/bio.c +++ b/block/bio.c @@ -417,8 +417,10 @@ static void punt_bios_to_rescuer(struct bio_set *bs) /** * bio_alloc_bioset - allocate a bio for I/O + * @bdev: block device to allocate the bio for (can be %NULL) + * @nr_vecs: number of bvecs to pre-allocate + * @opf: operation and flags for bio * @gfp_mask: the GFP_* mask given to the slab allocator - * @nr_iovecs: number of iovecs to pre-allocate * @bs: the bio_set to allocate from. * * Allocate a bio from the mempools in @bs. @@ -447,15 +449,16 @@ static void punt_bios_to_rescuer(struct bio_set *bs) * * Returns: Pointer to new bio on success, NULL on failure. */ -struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs, +struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, + unsigned int opf, gfp_t gfp_mask, struct bio_set *bs) { gfp_t saved_gfp = gfp_mask; struct bio *bio; void *p; - /* should not use nobvec bioset for nr_iovecs > 0 */ - if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0)) + /* should not use nobvec bioset for nr_vecs > 0 */ + if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0)) return NULL; /* @@ -492,26 +495,29 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs, return NULL; bio = p + bs->front_pad; - if (nr_iovecs > BIO_INLINE_VECS) { + if (nr_vecs > BIO_INLINE_VECS) { struct bio_vec *bvl = NULL; - bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask); + bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask); + bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask); } if (unlikely(!bvl)) goto err_free; - bio_init(bio, bvl, nr_iovecs); - } else if (nr_iovecs) { + bio_init(bio, bvl, nr_vecs); + } else if (nr_vecs) { bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS); } else { bio_init(bio, NULL, 0); } bio->bi_pool = bs; + if (bdev) + bio_set_dev(bio, bdev); + bio->bi_opf = opf; return bio; err_free: @@ -767,7 +773,7 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) { struct bio *b; - b = bio_alloc_bioset(gfp_mask, 0, bs); + b = bio_alloc_bioset(NULL, 0, 0, gfp_mask, bs); if (!b) return NULL; @@ -1743,7 +1749,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, struct bio *bio; if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) - return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs); + return bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs); cache = per_cpu_ptr(bs->cache, get_cpu()); if (cache->free_list) { @@ -1757,7 +1763,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, return bio; } put_cpu(); - bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs); + bio = bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs); bio_set_flag(bio, BIO_PERCPU_CACHE); return bio; } diff --git a/block/bounce.c b/block/bounce.c index 7af1a72835b99..330ddde25b460 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -165,12 +165,10 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) * asking for trouble and would force extra work on * __bio_clone_fast() anyways. */ - bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), - &bounce_bio_set); - bio->bi_bdev = bio_src->bi_bdev; + bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src), + bio_src->bi_opf, GFP_NOIO, &bounce_bio_set); if (bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); - bio->bi_opf = bio_src->bi_opf; bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 72cf7603d51fc..f5bcded3640da 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -138,15 +138,14 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, op_flags |= REQ_FUA | REQ_PREFLUSH; op_flags |= REQ_SYNC; - bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set); - bio_set_dev(bio, bdev->md_bdev); + bio = bio_alloc_bioset(bdev->md_bdev, 1, op | op_flags, GFP_NOIO, + &drbd_md_io_bio_set); bio->bi_iter.bi_sector = sector; err = -EIO; if (bio_add_page(bio, device->md_io.page, size, 0) != size) goto out; bio->bi_private = device; bio->bi_end_io = drbd_md_endio; - bio_set_op_attrs(bio, op, op_flags); if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL) /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index c1f816f896a89..df25eecf80af0 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -976,12 +976,13 @@ static void drbd_bm_endio(struct bio *bio) static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) { - struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set); struct drbd_device *device = ctx->device; + unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE; + struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, + GFP_NOIO, &drbd_md_io_bio_set); struct drbd_bitmap *b = device->bitmap; struct page *page; unsigned int len; - unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE; sector_t on_disk_sector = device->ldev->md.md_offset + device->ldev->md.bm_offset; @@ -1006,14 +1007,12 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; - bio_set_dev(bio, device->ldev->md_bdev); bio->bi_iter.bi_sector = on_disk_sector; /* bio_add_page of a single page to an empty bio will always succeed, * according to api. Do we want to assert that? */ bio_add_page(bio, page, len, 0); bio->bi_private = ctx; bio->bi_end_io = drbd_bm_endio; - bio_set_op_attrs(bio, op, 0); if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { bio_io_error(bio); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index d15aae6c51c13..c4b7e434de8ac 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -913,14 +913,13 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, /* btree_search_recurse()'s btree iterator is no good anymore */ ret = miss == bio ? MAP_DONE : -EINTR; - cache_bio = bio_alloc_bioset(GFP_NOWAIT, + cache_bio = bio_alloc_bioset(miss->bi_bdev, DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS), - &dc->disk.bio_split); + 0, GFP_NOWAIT, &dc->disk.bio_split); if (!cache_bio) goto out_submit; cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector; - bio_copy_dev(cache_bio, miss); cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; cache_bio->bi_end_io = backing_request_endio; @@ -1025,16 +1024,15 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) */ struct bio *flush; - flush = bio_alloc_bioset(GFP_NOIO, 0, - &dc->disk.bio_split); + flush = bio_alloc_bioset(bio->bi_bdev, 0, + REQ_OP_WRITE | REQ_PREFLUSH, + GFP_NOIO, &dc->disk.bio_split); if (!flush) { s->iop.status = BLK_STS_RESOURCE; goto insert_data; } - bio_copy_dev(flush, bio); flush->bi_end_io = backing_request_endio; flush->bi_private = cl; - flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; /* I/O request sent to backing device */ closure_bio_submit(s->iop.c, flush, cl); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3c5ecd35d3483..f7e4435b7439a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1672,11 +1672,10 @@ retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) mutex_lock(&cc->bio_alloc_lock); - clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs); + clone = bio_alloc_bioset(cc->dev->bdev, nr_iovecs, io->base_bio->bi_opf, + GFP_NOIO, &cc->bs); clone->bi_private = io; clone->bi_end_io = crypt_endio; - bio_set_dev(clone, cc->dev->bdev); - clone->bi_opf = io->base_bio->bi_opf; remaining_size = size; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2d3cda0acacb6..23e038f8dc845 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -345,11 +345,10 @@ static void do_region(int op, int op_flags, unsigned region, (PAGE_SIZE >> SECTOR_SHIFT))); } - bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios); + bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags, + GFP_NOIO, &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); - bio_set_dev(bio, where->bdev); bio->bi_end_io = endio; - bio_set_op_attrs(bio, op, op_flags); store_io_and_region_in_bio(bio, io, region); if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 4f31591d2d25e..5630b470ba429 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1821,11 +1821,11 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba max_pages = e->wc_list_contiguous; - bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set); + bio = bio_alloc_bioset(wc->dev->bdev, max_pages, REQ_OP_WRITE, + GFP_NOIO, &wc->bio_set); wb = container_of(bio, struct writeback_struct, bio); wb->wc = wc; bio->bi_end_io = writecache_writeback_endio; - bio_set_dev(bio, wc->dev->bdev); bio->bi_iter.bi_sector = read_original_sector(wc, e); if (max_pages <= WB_LIST_INLINE || unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), @@ -1852,7 +1852,8 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba wb->wc_list[wb->wc_list_n++] = f; e = f; } - bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA); + if (WC_MODE_FUA(wc)) + bio->bi_opf |= REQ_FUA; if (writecache_has_error(wc)) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e431f72c10bf4..069e29013b6be 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -519,7 +519,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs); + clone = bio_alloc_bioset(NULL, 0, 0, GFP_NOIO, &md->io_bs); tio = container_of(clone, struct dm_target_io, clone); tio->inside_dm_io = true; @@ -552,7 +552,8 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t /* the dm_target_io embedded in ci->io is available */ tio = &ci->io->tio; } else { - struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs); + struct bio *clone = bio_alloc_bioset(NULL, 0, 0, gfp_mask, + &ci->io->md->bs); if (!clone) return NULL; diff --git a/drivers/md/md.c b/drivers/md/md.c index 5881d05a76ebc..40fc1f7e65c5d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -562,11 +562,11 @@ static void submit_flushes(struct work_struct *ws) atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set); + bi = bio_alloc_bioset(rdev->bdev, 0, + REQ_OP_WRITE | REQ_PREFLUSH, + GFP_NOIO, &mddev->bio_set); bi->bi_end_io = md_end_flush; bi->bi_private = rdev; - bio_set_dev(bi, rdev->bdev); - bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; atomic_inc(&mddev->flush_pending); submit_bio(bi); rcu_read_lock(); @@ -955,7 +955,6 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, * If an error occurred, call md_error */ struct bio *bio; - int ff = 0; if (!page) return; @@ -963,11 +962,13 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, if (test_bit(Faulty, &rdev->flags)) return; - bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set); + bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, + 1, + REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, + GFP_NOIO, &mddev->sync_set); atomic_inc(&rdev->nr_pending); - bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev); bio->bi_iter.bi_sector = sector; bio_add_page(bio, page, size, 0); bio->bi_private = rdev; @@ -976,8 +977,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) && test_bit(FailFast, &rdev->flags) && !test_bit(LastDev, &rdev->flags)) - ff = MD_FAILFAST; - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff; + bio->bi_opf |= MD_FAILFAST; atomic_inc(&mddev->pending_writes); submit_bio(bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e2d8acb1e9881..43276f8fdc815 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1126,7 +1126,8 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, int i = 0; struct bio *behind_bio = NULL; - behind_bio = bio_alloc_bioset(GFP_NOIO, vcnt, &r1_bio->mddev->bio_set); + behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO, + &r1_bio->mddev->bio_set); if (!behind_bio) return; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2b969f70a31fb..cb7c58050708e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4892,14 +4892,12 @@ read_more: return sectors_done; } - read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set); - - bio_set_dev(read_bio, rdev->bdev); + read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ, + GFP_KERNEL, &mddev->bio_set); read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset); read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_reshape_read; - bio_set_op_attrs(read_bio, REQ_OP_READ, 0); r10_bio->master_bio = read_bio; r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 0b5dcaabbc155..66313adf99875 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -735,10 +735,9 @@ static void r5l_submit_current_io(struct r5l_log *log) static struct bio *r5l_bio_alloc(struct r5l_log *log) { - struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs); + struct bio *bio = bio_alloc_bioset(log->rdev->bdev, BIO_MAX_VECS, + REQ_OP_WRITE, GFP_NOIO, &log->bs); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start; return bio; @@ -1634,7 +1633,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, { struct page *page; - ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs); + ctx->ra_bio = bio_alloc_bioset(NULL, BIO_MAX_VECS, 0, GFP_KERNEL, + &log->bs); if (!ctx->ra_bio) return -ENOMEM; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 4ab417915d7f1..054d3bb252d48 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -496,11 +496,10 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) { struct bio *prev = bio; - bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, + bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS, + prev->bi_opf, GFP_NOIO, &ppl_conf->bs); - bio->bi_opf = prev->bi_opf; bio->bi_write_hint = prev->bi_write_hint; - bio_copy_dev(bio, prev); bio->bi_iter.bi_sector = bio_end_sector(prev); bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); @@ -637,10 +636,10 @@ static void ppl_do_flush(struct ppl_io_unit *io) struct bio *bio; char b[BDEVNAME_SIZE]; - bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs); - bio_set_dev(bio, bdev); + bio = bio_alloc_bioset(bdev, 0, GFP_NOIO, + REQ_OP_WRITE | REQ_PREFLUSH, + &ppl_conf->flush_bs); bio->bi_private = io; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; bio->bi_end_io = ppl_flush_endio; pr_debug("%s: dev: %s\n", __func__, diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 6045678365a59..3c92ba3748192 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -352,18 +352,16 @@ static struct bio *iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, * Only allocate as many vector entries as the bio code allows us to, * we'll loop later on until we have handled the whole request. */ - bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num), - &ib_dev->ibd_bio_set); + bio = bio_alloc_bioset(ib_dev->ibd_bd, bio_max_segs(sg_num), opf, + GFP_NOIO, &ib_dev->ibd_bio_set); if (!bio) { pr_err("Unable to allocate memory for bio\n"); return NULL; } - bio_set_dev(bio, ib_dev->ibd_bd); bio->bi_private = cmd; bio->bi_end_io = &iblock_bio_done; bio->bi_iter.bi_sector = lba; - bio->bi_opf = opf; return bio; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 409bad3928db3..421d921a05716 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3143,7 +3143,7 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs) struct bio *bio; ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS); - bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); + bio = bio_alloc_bioset(NULL, nr_iovecs, 0, GFP_NOFS, &btrfs_bioset); btrfs_bio_init(btrfs_bio(bio)); return bio; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c417864c66ae..e71dde8de0db0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -394,7 +394,7 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) struct f2fs_sb_info *sbi = fio->sbi; struct bio *bio; - bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset); + bio = bio_alloc_bioset(NULL, npages, 0, GFP_NOIO, &f2fs_bioset); f2fs_target_device(sbi, fio->new_blkaddr, bio); if (is_read_io(fio->op)) { @@ -985,8 +985,8 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio_post_read_ctx *ctx = NULL; unsigned int post_read_steps = 0; - bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL, - bio_max_segs(nr_pages), &f2fs_bioset); + bio = bio_alloc_bioset(NULL, bio_max_segs(nr_pages), REQ_OP_READ, + for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset); if (!bio) return ERR_PTR(-ENOMEM); @@ -994,7 +994,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, op_flag); if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= STEP_DECRYPT; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c938bbad075e1..340d373cb1bf9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1196,10 +1196,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend; struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset); - bio_set_dev(bio, wpc->iomap.bdev); + bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, + REQ_OP_WRITE | wbc_to_write_flags(wbc), + GFP_NOFS, &iomap_ioend_bioset); bio->bi_iter.bi_sector = sector; - bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); bio->bi_write_hint = inode->i_write_hint; wbc_init_bio(wbc, bio); diff --git a/include/linux/bio.h b/include/linux/bio.h index edeae54074ede..2f63ae9a71e1a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,8 +405,9 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); -struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs, - struct bio_set *bs); +struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, + unsigned int opf, gfp_t gfp_mask, + struct bio_set *bs); struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); @@ -419,7 +420,7 @@ extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); + return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); -- GitLab From b77c88c2100ce6a5ec8126c13599b5a7f6663e32 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:04 +0100 Subject: [PATCH 0196/1586] block: pass a block_device and opf to bio_alloc_kiocb Pass the block_device and operation that we plan to use this bio for to bio_alloc_kiocb to optimize the assigment. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-17-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 12 ++++++++---- block/fops.c | 17 ++++++++--------- include/linux/bio.h | 4 ++-- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/block/bio.c b/block/bio.c index 9afc0c2aca6e4..6c3efb0fd12b1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1731,7 +1731,9 @@ EXPORT_SYMBOL(bioset_init_from_src); /** * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb * @kiocb: kiocb describing the IO + * @bdev: block device to allocate the bio for (can be %NULL) * @nr_vecs: number of iovecs to pre-allocate + * @opf: operation and flags for bio * @bs: bio_set to allocate from * * Description: @@ -1742,14 +1744,14 @@ EXPORT_SYMBOL(bioset_init_from_src); * MUST be done from process context, not hard/soft IRQ. * */ -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, - struct bio_set *bs) +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, struct bio_set *bs) { struct bio_alloc_cache *cache; struct bio *bio; if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) - return bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs); + return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs); cache = per_cpu_ptr(bs->cache, get_cpu()); if (cache->free_list) { @@ -1758,12 +1760,14 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, cache->nr--; put_cpu(); bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); + bio_set_dev(bio, bdev); + bio->bi_opf = opf; bio->bi_pool = bs; bio_set_flag(bio, BIO_PERCPU_CACHE); return bio; } put_cpu(); - bio = bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs); + bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs); bio_set_flag(bio, BIO_PERCPU_CACHE); return bio; } diff --git a/block/fops.c b/block/fops.c index 26bf15c770d21..3a62b8b912750 100644 --- a/block/fops.c +++ b/block/fops.c @@ -190,6 +190,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, struct blkdev_dio *dio; struct bio *bio; bool is_read = (iov_iter_rw(iter) == READ), is_sync; + unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); loff_t pos = iocb->ki_pos; int ret = 0; @@ -197,7 +198,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); atomic_set(&dio->ref, 1); @@ -223,7 +224,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, blk_start_plug(&plug); for (;;) { - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio->bi_write_hint = iocb->ki_hint; bio->bi_private = dio; @@ -238,11 +238,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } if (is_read) { - bio->bi_opf = REQ_OP_READ; if (dio->flags & DIO_SHOULD_DIRTY) bio_set_pages_dirty(bio); } else { - bio->bi_opf = dio_bio_write_op(iocb); task_io_account_write(bio->bi_iter.bi_size); } if (iocb->ki_flags & IOCB_NOWAIT) @@ -259,6 +257,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, atomic_inc(&dio->ref); submit_bio(bio); bio = bio_alloc(GFP_KERNEL, nr_pages); + bio_set_dev(bio, bdev); + bio->bi_opf = opf; } blk_finish_plug(&plug); @@ -311,6 +311,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, unsigned int nr_pages) { struct block_device *bdev = iocb->ki_filp->private_data; + bool is_read = iov_iter_rw(iter) == READ; + unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); struct blkdev_dio *dio; struct bio *bio; loff_t pos = iocb->ki_pos; @@ -320,11 +322,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); dio->flags = 0; dio->iocb = iocb; - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio->bi_write_hint = iocb->ki_hint; bio->bi_end_io = blkdev_bio_end_io_async; @@ -347,14 +348,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, } dio->size = bio->bi_iter.bi_size; - if (iov_iter_rw(iter) == READ) { - bio->bi_opf = REQ_OP_READ; + if (is_read) { if (iter_is_iovec(iter)) { dio->flags |= DIO_SHOULD_DIRTY; bio_set_pages_dirty(bio); } } else { - bio->bi_opf = dio_bio_write_op(iocb); task_io_account_write(bio->bi_iter.bi_size); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 2f63ae9a71e1a..5c5ada2ebb270 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,8 +408,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask, struct bio_set *bs); -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, - struct bio_set *bs); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -- GitLab From 07888c665b405b1cd3577ddebfeb74f4717a84c4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:05 +0100 Subject: [PATCH 0197/1586] block: pass a block_device and opf to bio_alloc Pass the block_device and operation that we plan to use this bio for to bio_alloc to optimize the assignment. NULL/0 can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Also move the gfp_mask argument after the nr_vecs argument for a much more logical calling convention matching what most of the kernel does. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-18-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 5 +---- block/fops.c | 4 +--- drivers/block/drbd/drbd_receiver.c | 10 ++++------ drivers/block/rnbd/rnbd-srv.c | 5 ++--- drivers/block/xen-blkback/blkback.c | 11 +++++------ drivers/block/zram/zram_drv.c | 11 ++++------- drivers/md/dm-log-writes.c | 21 ++++++++------------- drivers/md/dm-thin.c | 9 ++++----- drivers/md/dm-zoned-metadata.c | 15 ++++++--------- drivers/nvdimm/nd_virtio.c | 6 +++--- drivers/nvme/target/io-cmd-bdev.c | 12 ++++++------ drivers/nvme/target/passthru.c | 5 +++-- drivers/nvme/target/zns.c | 6 +++--- drivers/scsi/ufs/ufshpb.c | 4 ++-- drivers/target/target_core_iblock.c | 5 ++--- fs/btrfs/disk-io.c | 6 +++--- fs/buffer.c | 14 ++++++-------- fs/crypto/bio.c | 13 +++++++------ fs/direct-io.c | 5 +---- fs/erofs/zdata.c | 5 ++--- fs/ext4/page-io.c | 3 +-- fs/ext4/readpage.c | 8 ++++---- fs/gfs2/lops.c | 8 +++----- fs/gfs2/meta_io.c | 4 +--- fs/gfs2/ops_fstype.c | 4 +--- fs/hfsplus/wrapper.c | 4 +--- fs/iomap/buffered-io.c | 16 ++++++++-------- fs/iomap/direct-io.c | 8 ++------ fs/jfs/jfs_logmgr.c | 11 ++--------- fs/jfs/jfs_metapage.c | 9 +++------ fs/mpage.c | 7 +++---- fs/nfs/blocklayout/blocklayout.c | 4 +--- fs/nilfs2/segbuf.c | 4 ++-- fs/ntfs3/fsntfs.c | 8 ++------ fs/ocfs2/cluster/heartbeat.c | 4 +--- fs/squashfs/block.c | 11 ++++++----- fs/xfs/xfs_bio_io.c | 10 ++++------ fs/xfs/xfs_buf.c | 4 +--- fs/zonefs/super.c | 5 ++--- include/linux/bio.h | 5 +++-- kernel/power/swap.c | 5 ++--- mm/page_io.c | 10 ++++------ 42 files changed, 130 insertions(+), 194 deletions(-) diff --git a/block/bio.c b/block/bio.c index 6c3efb0fd12b1..b73c9babd5835 100644 --- a/block/bio.c +++ b/block/bio.c @@ -347,10 +347,7 @@ EXPORT_SYMBOL(bio_chain); struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, unsigned int opf, gfp_t gfp) { - struct bio *new = bio_alloc(gfp, nr_pages); - - bio_set_dev(new, bdev); - new->bi_opf = opf; + struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp); if (bio) { bio_chain(bio, new); diff --git a/block/fops.c b/block/fops.c index 3a62b8b912750..c683596847731 100644 --- a/block/fops.c +++ b/block/fops.c @@ -256,9 +256,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } atomic_inc(&dio->ref); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, nr_pages); - bio_set_dev(bio, bdev); - bio->bi_opf = opf; + bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL); } blk_finish_plug(&plug); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fb59b263deeef..04e3ec12d8b49 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1279,7 +1279,8 @@ static void one_flush_endio(struct bio *bio) static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) { - struct bio *bio = bio_alloc(GFP_NOIO, 0); + struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0, + REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO); struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); if (!octx) { @@ -1297,10 +1298,8 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont octx->device = device; octx->ctx = ctx; - bio_set_dev(bio, device->ldev->backing_bdev); bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); @@ -1685,11 +1684,10 @@ int drbd_submit_peer_request(struct drbd_device *device, * generated bio, but a bio allocated on behalf of the peer. */ next_bio: - bio = bio_alloc(GFP_NOIO, nr_pages); + bio = bio_alloc(device->ldev->backing_bdev, nr_pages, op | op_flags, + GFP_NOIO); /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, device->ldev->backing_bdev); - bio_set_op_attrs(bio, op, op_flags); bio->bi_private = peer_req; bio->bi_end_io = drbd_peer_request_endio; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index ff9b389976078..132e950685d59 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -149,7 +149,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - bio = bio_alloc(GFP_KERNEL, 1); + bio = bio_alloc(sess_dev->rnbd_dev->bdev, 1, + rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL); if (bio_add_page(bio, virt_to_page(data), datalen, offset_in_page(data)) != datalen) { rnbd_srv_err(sess_dev, "Failed to map data to bio\n"); @@ -159,13 +160,11 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, bio->bi_end_io = rnbd_dev_bi_end_io; bio->bi_private = priv; - bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw)); bio->bi_iter.bi_sector = le64_to_cpu(msg->sector); bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size); prio = srv_sess->ver < RNBD_PROTO_VER_MAJOR || usrlen < sizeof(*msg) ? 0 : le16_to_cpu(msg->prio); bio_set_prio(bio, prio); - bio_set_dev(bio, sess_dev->rnbd_dev->bdev); submit_bio(bio); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6bb2ad7692065..d1e26461a64ed 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1326,13 +1326,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, pages[i]->page, seg[i].nsec << 9, seg[i].offset) == 0)) { - bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i)); + bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i), + operation | operation_flags, + GFP_KERNEL); biolist[nbio++] = bio; - bio_set_dev(bio, preq.bdev); bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; bio->bi_iter.bi_sector = preq.sector_number; - bio_set_op_attrs(bio, operation, operation_flags); } preq.sector_number += seg[i].nsec; @@ -1342,12 +1342,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, if (!bio) { BUG_ON(operation_flags != REQ_PREFLUSH); - bio = bio_alloc(GFP_KERNEL, 0); + bio = bio_alloc(preq.bdev, 0, operation | operation_flags, + GFP_KERNEL); biolist[nbio++] = bio; - bio_set_dev(bio, preq.bdev); bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; - bio_set_op_attrs(bio, operation, operation_flags); } atomic_set(&pending_req->pendcnt, nbio); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 342dbcb3f2208..f3fe0ea8aa80f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -616,24 +616,21 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, { struct bio *bio; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, + GFP_NOIO); if (!bio) return -ENOMEM; bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - bio_set_dev(bio, zram->bdev); if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { bio_put(bio); return -EIO; } - if (!parent) { - bio->bi_opf = REQ_OP_READ; + if (!parent) bio->bi_end_io = zram_page_end_io; - } else { - bio->bi_opf = parent->bi_opf; + else bio_chain(bio, parent); - } submit_bio(bio); return 1; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 25f5e8d2d417b..c9d036d6bb2ee 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -217,14 +217,12 @@ static int write_metadata(struct log_writes_c *lc, void *entry, void *ptr; size_t ret; - bio = bio_alloc(GFP_KERNEL, 1); + bio = bio_alloc(lc->logdev->bdev, 1, REQ_OP_WRITE, GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ? log_end_super : log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); page = alloc_page(GFP_KERNEL); if (!page) { @@ -271,13 +269,12 @@ static int write_inline_data(struct log_writes_c *lc, void *entry, atomic_inc(&lc->io_blocks); - bio = bio_alloc(GFP_KERNEL, bio_pages); + bio = bio_alloc(lc->logdev->bdev, bio_pages, REQ_OP_WRITE, + GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); for (i = 0; i < bio_pages; i++) { pg_datalen = min_t(int, datalen, PAGE_SIZE); @@ -353,13 +350,12 @@ static int log_one_block(struct log_writes_c *lc, goto out; atomic_inc(&lc->io_blocks); - bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt)); + bio = bio_alloc(lc->logdev->bdev, bio_max_segs(block->vec_cnt), + REQ_OP_WRITE, GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); for (i = 0; i < block->vec_cnt; i++) { /* @@ -371,14 +367,13 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, - bio_max_segs(block->vec_cnt - i)); + bio = bio_alloc(lc->logdev->bdev, + bio_max_segs(block->vec_cnt - i), + REQ_OP_WRITE, GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); ret = bio_add_page(bio, block->vecs[i].bv_page, block->vecs[i].bv_len, 0); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 411a3f56ed90c..f4234d615aa1b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1177,13 +1177,12 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) return; } - discard_parent = bio_alloc(GFP_NOIO, 1); + discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO); discard_parent->bi_end_io = passdown_endio; discard_parent->bi_private = m; - - if (m->maybe_shared) - passdown_double_checking_shared_status(m, discard_parent); - else { + if (m->maybe_shared) + passdown_double_checking_shared_status(m, discard_parent); + else { struct discard_op op; begin_discard(&op, tc, discard_parent); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 5718b83cc7182..e5f1eb27ce2e9 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -550,7 +550,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, if (!mblk) return ERR_PTR(-ENOMEM); - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc(dev->bdev, 1, REQ_OP_READ | REQ_META | REQ_PRIO, + GFP_NOIO); spin_lock(&zmd->mblk_lock); @@ -574,10 +575,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, /* Submit read BIO */ bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); @@ -721,15 +720,14 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, if (dmz_bdev_is_dying(dev)) return -EIO; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_META | REQ_PRIO, + GFP_NOIO); set_bit(DMZ_META_WRITING, &mblk->state); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); @@ -751,10 +749,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, if (dmz_bdev_is_dying(dev)) return -EIO; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO, + GFP_NOIO); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, dev->bdev); - bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); ret = submit_bio_wait(bio); bio_put(bio); diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c index 10351d5b49fac..c6a648fd8744a 100644 --- a/drivers/nvdimm/nd_virtio.c +++ b/drivers/nvdimm/nd_virtio.c @@ -105,12 +105,12 @@ int async_pmem_flush(struct nd_region *nd_region, struct bio *bio) * parent bio. Otherwise directly call nd_region flush. */ if (bio && bio->bi_iter.bi_sector != -1) { - struct bio *child = bio_alloc(GFP_ATOMIC, 0); + struct bio *child = bio_alloc(bio->bi_bdev, 0, REQ_PREFLUSH, + GFP_ATOMIC); if (!child) return -ENOMEM; - bio_copy_dev(child, bio); - child->bi_opf = REQ_PREFLUSH; + bio_clone_blkg_association(child, bio); child->bi_iter.bi_sector = -1; bio_chain(child, bio); submit_bio(child); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 70ca9dfc1771a..e092af3abc710 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -268,14 +268,15 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) if (nvmet_use_inline_bvec(req)) { bio = &req->b.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_opf = op; } else { - bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); + bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op, + GFP_KERNEL); } - bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio->bi_opf = op; blk_start_plug(&plug); if (req->metadata_len) @@ -296,10 +297,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } } - bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); - bio_set_dev(bio, req->ns->bdev); + bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), + op, GFP_KERNEL); bio->bi_iter.bi_sector = sector; - bio->bi_opf = op; bio_chain(bio, prev); submit_bio(prev); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 9e5b89ae29dfe..38f72968c3fde 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -207,11 +207,12 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) if (nvmet_use_inline_bvec(req)) { bio = &req->p.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio->bi_opf = req_op(rq); } else { - bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt)); + bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq), + GFP_KERNEL); bio->bi_end_io = bio_put; } - bio->bi_opf = req_op(rq); for_each_sg(req->sg, sg, req->sg_cnt, i) { if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length, diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 247de74247fab..62c53e8f26d35 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -522,6 +522,7 @@ static void nvmet_bdev_zone_append_bio_done(struct bio *bio) void nvmet_bdev_execute_zone_append(struct nvmet_req *req) { sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); + const unsigned int op = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; u16 status = NVME_SC_SUCCESS; unsigned int total_len = 0; struct scatterlist *sg; @@ -552,13 +553,12 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req) if (nvmet_use_inline_bvec(req)) { bio = &req->z.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio->bi_opf = op; } else { - bio = bio_alloc(GFP_KERNEL, req->sg_cnt); + bio = bio_alloc(req->ns->bdev, req->sg_cnt, op, GFP_KERNEL); } - bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; bio->bi_end_io = nvmet_bdev_zone_append_bio_done; - bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sect; bio->bi_private = req; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 2d36a0715fca6..8970068314ef2 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -494,7 +494,7 @@ static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb, if (!map_req) return NULL; - bio = bio_alloc(GFP_KERNEL, hpb->pages_per_srgn); + bio = bio_alloc(NULL, hpb->pages_per_srgn, 0, GFP_KERNEL); if (!bio) { ufshpb_put_req(hpb, map_req); return NULL; @@ -2050,7 +2050,7 @@ static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb) INIT_LIST_HEAD(&pre_req->list_req); pre_req->req = NULL; - pre_req->bio = bio_alloc(GFP_KERNEL, 1); + pre_req->bio = bio_alloc(NULL, 1, 0, GFP_KERNEL); if (!pre_req->bio) goto release_mem; diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 3c92ba3748192..87ede165ddba4 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -415,10 +415,9 @@ iblock_execute_sync_cache(struct se_cmd *cmd) if (immed) target_complete_cmd(cmd, SAM_STAT_GOOD); - bio = bio_alloc(GFP_KERNEL, 0); + bio = bio_alloc(ib_dev->ibd_bd, 0, REQ_OP_WRITE | REQ_PREFLUSH, + GFP_KERNEL); bio->bi_end_io = iblock_end_io_flush; - bio_set_dev(bio, ib_dev->ibd_bd); - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; if (!immed) bio->bi_private = cmd; submit_bio(bio); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87a5addbedf6d..f45aa506f9a6f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4029,8 +4029,9 @@ static int write_dev_supers(struct btrfs_device *device, * to do I/O, so we don't lose the ability to do integrity * checking. */ - bio = bio_alloc(GFP_NOFS, 1); - bio_set_dev(bio, device->bdev); + bio = bio_alloc(device->bdev, 1, + REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, + GFP_NOFS); bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT; bio->bi_private = device; bio->bi_end_io = btrfs_end_super_write; @@ -4042,7 +4043,6 @@ static int write_dev_supers(struct btrfs_device *device, * go down lazy and there's a short window where the on-disk * copies might still contain the older version. */ - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO; if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER)) bio->bi_opf |= REQ_FUA; diff --git a/fs/buffer.c b/fs/buffer.c index 8e112b6bd3719..a17c386a142c7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3024,12 +3024,16 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE)) clear_buffer_write_io_error(bh); - bio = bio_alloc(GFP_NOIO, 1); + if (buffer_meta(bh)) + op_flags |= REQ_META; + if (buffer_prio(bh)) + op_flags |= REQ_PRIO; + + bio = bio_alloc(bh->b_bdev, 1, op | op_flags, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); bio->bi_write_hint = write_hint; bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); @@ -3038,12 +3042,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; - if (buffer_meta(bh)) - op_flags |= REQ_META; - if (buffer_prio(bh)) - op_flags |= REQ_PRIO; - bio_set_op_attrs(bio, op, op_flags); - /* Take care of bh's that straddle the end of the device */ guard_bio_eod(bio); diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index bfc2a5b74ed39..755e985a42e0b 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -54,7 +54,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, int num_pages = 0; /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ - bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS); + bio = bio_alloc(inode->i_sb->s_bdev, BIO_MAX_VECS, REQ_OP_WRITE, + GFP_NOFS); while (len) { unsigned int blocks_this_page = min(len, blocks_per_page); @@ -62,10 +63,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, if (num_pages == 0) { fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS); - bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - SECTOR_SHIFT); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); } ret = bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0); if (WARN_ON(ret != bytes_this_page)) { @@ -82,6 +81,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, if (err) goto out; bio_reset(bio); + bio_set_dev(bio, inode->i_sb->s_bdev); + bio->bi_opf = REQ_OP_WRITE; num_pages = 0; } } @@ -150,12 +151,10 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return -EINVAL; /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ - bio = bio_alloc(GFP_NOFS, nr_pages); + bio = bio_alloc(inode->i_sb->s_bdev, nr_pages, REQ_OP_WRITE, GFP_NOFS); do { - bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); i = 0; offset = 0; @@ -183,6 +182,8 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, if (err) goto out; bio_reset(bio); + bio_set_dev(bio, inode->i_sb->s_bdev); + bio->bi_opf = REQ_OP_WRITE; } while (len != 0); err = 0; out: diff --git a/fs/direct-io.c b/fs/direct-io.c index 6544435580470..38bca4980a1ca 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -396,11 +396,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, * bio_alloc() is guaranteed to return a bio when allowed to sleep and * we request a valid number of vectors. */ - bio = bio_alloc(GFP_KERNEL, nr_vecs); - - bio_set_dev(bio, bdev); + bio = bio_alloc(bdev, nr_vecs, dio->op | dio->op_flags, GFP_KERNEL); bio->bi_iter.bi_sector = first_sector; - bio_set_op_attrs(bio, dio->op, dio->op_flags); if (dio->is_async) bio->bi_end_io = dio_bio_end_aio; else diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 498b7666efe85..db7de2dbac739 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1371,15 +1371,14 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, + REQ_OP_READ, GFP_NOIO); bio->bi_end_io = z_erofs_decompressqueue_endio; - bio_set_dev(bio, mdev.m_bdev); last_bdev = mdev.m_bdev; bio->bi_iter.bi_sector = (sector_t)cur << LOG_SECTORS_PER_BLOCK; bio->bi_private = bi_private; - bio->bi_opf = REQ_OP_READ; if (f->readahead) bio->bi_opf |= REQ_RAHEAD; ++nr_bios; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 1d370364230e8..1253982268730 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -398,10 +398,9 @@ static void io_submit_init_bio(struct ext4_io_submit *io, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). */ - bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, 0, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 4cd62f1d848c8..1aa26d6634fc9 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -365,15 +365,15 @@ int ext4_mpage_readpages(struct inode *inode, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). */ - bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages)); + bio = bio_alloc(bdev, bio_max_segs(nr_pages), + REQ_OP_READ, GFP_KERNEL); fscrypt_set_bio_crypt_ctx(bio, inode, next_block, GFP_KERNEL); ext4_set_bio_post_read_ctx(bio, inode, page->index); - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, - rac ? REQ_RAHEAD : 0); + if (rac) + bio->bi_opf |= REQ_RAHEAD; } length = first_hole << blkbits; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index ca0bb3a73912a..4ae1eefae616d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -265,10 +265,9 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, bio_end_io_t *end_io) { struct super_block *sb = sdp->sd_vfs; - struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + struct bio *bio = bio_alloc(sb->s_bdev, BIO_MAX_VECS, 0, GFP_NOIO); bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; - bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = end_io; bio->bi_private = sdp; @@ -489,10 +488,9 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) { struct bio *new; - new = bio_alloc(GFP_NOIO, nr_iovecs); - bio_copy_dev(new, prev); + new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO); + bio_clone_blkg_association(new, prev); new->bi_iter.bi_sector = bio_end_sector(prev); - new->bi_opf = prev->bi_opf; new->bi_write_hint = prev->bi_write_hint; bio_chain(new, prev); submit_bio(prev); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 72d30a682ecec..a580b90b75222 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -222,9 +222,8 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], struct buffer_head *bh = *bhs; struct bio *bio; - bio = bio_alloc(GFP_NOIO, num); + bio = bio_alloc(bh->b_bdev, num, op | op_flags, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); while (num > 0) { bh = *bhs; if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) { @@ -235,7 +234,6 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], num--; } bio->bi_end_io = gfs2_meta_read_endio; - bio_set_op_attrs(bio, op, op_flags); submit_bio(bio); } } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7f8410d8fdc1d..c9b423c874a32 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -251,14 +251,12 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) ClearPageDirty(page); lock_page(page); - bio = bio_alloc(GFP_NOFS, 1); + bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS); bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); - bio_set_dev(bio, sb->s_bdev); bio_add_page(bio, page, PAGE_SIZE, 0); bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META); submit_bio(bio); wait_on_page_locked(page); bio_put(bio); diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 4688cc7b36926..0b8ad6586df53 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -63,10 +63,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, offset = start & (io_size - 1); sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc(sb->s_bdev, 1, op | op_flags, GFP_NOIO); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, sb->s_bdev); - bio_set_op_attrs(bio, op, op_flags); if (op != WRITE && data) *data = (u8 *)buf + offset; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 340d373cb1bf9..70f3657a6ec06 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -290,19 +290,20 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; - ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs)); + ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), + REQ_OP_READ, gfp); /* * If the bio_alloc fails, try it again for a single page to * avoid having to deal with partial page reads. This emulates * what do_mpage_readpage does. */ - if (!ctx->bio) - ctx->bio = bio_alloc(orig_gfp, 1); - ctx->bio->bi_opf = REQ_OP_READ; + if (!ctx->bio) { + ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, + orig_gfp); + } if (ctx->rac) ctx->bio->bi_opf |= REQ_RAHEAD; ctx->bio->bi_iter.bi_sector = sector; - bio_set_dev(ctx->bio, iomap->bdev); ctx->bio->bi_end_io = iomap_read_end_io; bio_add_folio(ctx->bio, folio, plen, poff); } @@ -1226,10 +1227,9 @@ iomap_chain_bio(struct bio *prev) { struct bio *new; - new = bio_alloc(GFP_NOFS, BIO_MAX_VECS); - bio_copy_dev(new, prev);/* also copies over blkcg information */ + new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS); + bio_clone_blkg_association(new, prev); new->bi_iter.bi_sector = bio_end_sector(prev); - new->bi_opf = prev->bi_opf; new->bi_write_hint = prev->bi_write_hint; bio_chain(prev, new); diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 03ea367df19a4..e2ba13645ef28 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -183,15 +183,13 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, int flags = REQ_SYNC | REQ_IDLE; struct bio *bio; - bio = bio_alloc(GFP_KERNEL, 1); - bio_set_dev(bio, iter->iomap.bdev); + bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; get_page(page); __bio_add_page(bio, page, len, 0); - bio_set_op_attrs(bio, REQ_OP_WRITE, flags); iomap_dio_submit_bio(iter, dio, bio, pos); } @@ -309,14 +307,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, goto out; } - bio = bio_alloc(GFP_KERNEL, nr_pages); - bio_set_dev(bio, iomap->bdev); + bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(iomap, pos); bio->bi_write_hint = dio->iocb->ki_hint; bio->bi_ioprio = dio->iocb->ki_ioprio; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; - bio->bi_opf = bio_opf; ret = bio_iov_iter_get_pages(bio, dio->submit.iter); if (unlikely(ret)) { diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 78fd136ac13b9..997c81fcea349 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1980,17 +1980,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bp->l_flag |= lbmREAD; - bio = bio_alloc(GFP_NOFS, 1); - + bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS); bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio_set_dev(bio, log->bdev); - bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio->bi_opf = REQ_OP_READ; /*check if journaling to disk has been disabled*/ if (log->no_integrity) { bio->bi_iter.bi_size = 0; @@ -2125,16 +2121,13 @@ static void lbmStartIO(struct lbuf * bp) jfs_info("lbmStartIO"); - bio = bio_alloc(GFP_NOFS, 1); + bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS); bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio_set_dev(bio, log->bdev); - bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; /* check if journaling to disk has been disabled */ if (log->no_integrity) { diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 104ae698443ed..fde1a9cf902e8 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -417,12 +417,10 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) } len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); - bio = bio_alloc(GFP_NOFS, 1); - bio_set_dev(bio, inode->i_sb->s_bdev); + bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOFS); bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_write_end_io; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); /* Don't call bio_add_page yet, we may add to this vec */ bio_offset = offset; @@ -497,13 +495,12 @@ static int metapage_readpage(struct file *fp, struct page *page) if (bio) submit_bio(bio); - bio = bio_alloc(GFP_NOFS, 1); - bio_set_dev(bio, inode->i_sb->s_bdev); + bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_READ, + GFP_NOFS); bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_read_end_io; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_READ, 0); len = xlen << inode->i_blkbits; offset = block_offset << inode->i_blkbits; if (bio_add_page(bio, page, len, offset) < len) diff --git a/fs/mpage.c b/fs/mpage.c index 06e95d777e940..dbfc02e23d97f 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -273,10 +273,10 @@ alloc_new: page)) goto out; } - args->bio = bio_alloc(gfp, bio_max_segs(args->nr_pages)); + args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), 0, + gfp); if (args->bio == NULL) goto confused; - bio_set_dev(args->bio, bdev); args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); } @@ -586,8 +586,7 @@ alloc_new: page, wbc)) goto out; } - bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS); - bio_set_dev(bio, bdev); + bio = bio_alloc(bdev, BIO_MAX_VECS, 0, GFP_NOFS); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); wbc_init_bio(wbc, bio); diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 38e063af7e98a..79a8b451791f5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -154,12 +154,10 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, retry: if (!bio) { - bio = bio_alloc(GFP_NOIO, bio_max_segs(npg)); + bio = bio_alloc(map->bdev, bio_max_segs(npg), rw, GFP_NOIO); bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT; - bio_set_dev(bio, map->bdev); bio->bi_end_io = end_io; bio->bi_private = par; - bio_set_op_attrs(bio, rw, 0); } if (bio_add_page(bio, page, *len, offset) < *len) { bio = bl_submit_bio(bio); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 53b7c6d21cdd8..4f71faacd8253 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -391,8 +391,8 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = bio_alloc(GFP_NOIO, wi->nr_vecs); - bio_set_dev(wi->bio, wi->nilfs->ns_bdev); + wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, 0, + GFP_NOIO); wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) << (wi->nilfs->ns_blocksize_bits - 9); } diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 4a255e21ecf5f..0660a07c5a96e 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1485,15 +1485,13 @@ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; new_bio: - new = bio_alloc(GFP_NOFS, nr_pages - page_idx); + new = bio_alloc(bdev, nr_pages - page_idx, op, GFP_NOFS); if (bio) { bio_chain(bio, new); submit_bio(bio); } bio = new; - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = lbo >> 9; - bio->bi_opf = op; while (len) { off = vbo & (PAGE_SIZE - 1); @@ -1584,14 +1582,12 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run) lbo = (u64)lcn << cluster_bits; len = (u64)clen << cluster_bits; new_bio: - new = bio_alloc(GFP_NOFS, BIO_MAX_VECS); + new = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS); if (bio) { bio_chain(bio, new); submit_bio(bio); } bio = new; - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_WRITE; bio->bi_iter.bi_sector = lbo >> 9; for (;;) { diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a17be1618bf70..ea0e70c0fce09 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -518,7 +518,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, * GFP_KERNEL that the local node can get fenced. It would be * nicest if we could pre-allocate these bios and avoid this * all together. */ - bio = bio_alloc(GFP_ATOMIC, 16); + bio = bio_alloc(reg->hr_bdev, 16, op | op_flags, GFP_ATOMIC); if (!bio) { mlog(ML_ERROR, "Could not alloc slots BIO!\n"); bio = ERR_PTR(-ENOMEM); @@ -527,10 +527,8 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, /* Must put everything in 512 byte sectors for the bio... */ bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9); - bio_set_dev(bio, reg->hr_bdev); bio->bi_private = wc; bio->bi_end_io = o2hb_bio_end_io; - bio_set_op_attrs(bio, op, op_flags); vec_start = (cs << bits) % PAGE_SIZE; while(cs < max_slots) { diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2db8bcf7ff859..622c844f6d118 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -86,16 +86,17 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, int error, i; struct bio *bio; - if (page_count <= BIO_MAX_VECS) - bio = bio_alloc(GFP_NOIO, page_count); - else + if (page_count <= BIO_MAX_VECS) { + bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO); + } else { bio = bio_kmalloc(GFP_NOIO, page_count); + bio_set_dev(bio, sb->s_bdev); + bio->bi_opf = REQ_OP_READ; + } if (!bio) return -ENOMEM; - bio_set_dev(bio, sb->s_bdev); - bio->bi_opf = READ; bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT); for (i = 0; i < page_count; ++i) { diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index 667e297f59b16..eff4a9f21dcff 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -61,10 +61,9 @@ xfs_rw_bdev( if (is_vmalloc && op == REQ_OP_WRITE) flush_kernel_vmap_range(data, count); - bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); - bio_set_dev(bio, bdev); + bio = bio_alloc(bdev, bio_max_vecs(left), op | REQ_META | REQ_SYNC, + GFP_KERNEL); bio->bi_iter.bi_sector = sector; - bio->bi_opf = op | REQ_META | REQ_SYNC; do { struct page *page = kmem_to_page(data); @@ -74,10 +73,9 @@ xfs_rw_bdev( while (bio_add_page(bio, page, len, off) != len) { struct bio *prev = bio; - bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); - bio_copy_dev(bio, prev); + bio = bio_alloc(prev->bi_bdev, bio_max_vecs(left), + prev->bi_opf, GFP_KERNEL); bio->bi_iter.bi_sector = bio_end_sector(prev); - bio->bi_opf = prev->bi_opf; bio_chain(prev, bio); submit_bio(prev); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b45e0d50a4052..ae87fd95b17e2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1440,12 +1440,10 @@ next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = bio_max_segs(total_nr_pages); - bio = bio_alloc(GFP_NOIO, nr_pages); - bio_set_dev(bio, bp->b_target->bt_bdev); + bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO); bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; - bio->bi_opf = op; for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index b76dfb310ab65..c0fc2c326dcee 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -692,12 +692,11 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) if (!nr_pages) return 0; - bio = bio_alloc(GFP_NOFS, nr_pages); - bio_set_dev(bio, bdev); + bio = bio_alloc(bdev, nr_pages, + REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); bio->bi_iter.bi_sector = zi->i_zsector; bio->bi_write_hint = iocb->ki_hint; bio->bi_ioprio = iocb->ki_ioprio; - bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; if (iocb->ki_flags & IOCB_DSYNC) bio->bi_opf |= REQ_FUA; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5c5ada2ebb270..be6ac92913d48 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -418,9 +418,10 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; -static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) +static inline struct bio *bio_alloc(struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) { - return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set); + return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index f1bd031295752..6c4f983cbacc5 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -276,10 +276,9 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, struct bio *bio; int error = 0; - bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1); + bio = bio_alloc(hib_resume_bdev, 1, op | op_flags, + GFP_NOIO | __GFP_HIGH); bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); - bio_set_dev(bio, hib_resume_bdev); - bio_set_op_attrs(bio, op, op_flags); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { pr_err("Adding page to bio failed at %llu\n", diff --git a/mm/page_io.c b/mm/page_io.c index 0bf8e40f4e573..61c792f916fac 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -338,10 +338,10 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return 0; } - bio = bio_alloc(GFP_NOIO, 1); - bio_set_dev(bio, sis->bdev); + bio = bio_alloc(sis->bdev, 1, + REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc), + GFP_NOIO); bio->bi_iter.bi_sector = swap_page_sector(page); - bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); bio->bi_end_io = end_write_func; bio_add_page(bio, page, thp_size(page), 0); @@ -403,9 +403,7 @@ int swap_readpage(struct page *page, bool synchronous) } ret = 0; - bio = bio_alloc(GFP_KERNEL, 1); - bio_set_dev(bio, sis->bdev); - bio->bi_opf = REQ_OP_READ; + bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = swap_page_sector(page); bio->bi_end_io = end_swap_bio_read; bio_add_page(bio, page, thp_size(page), 0); -- GitLab From 49add4966d79244013fce35f95c6833fae82b8b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:06 +0100 Subject: [PATCH 0198/1586] block: pass a block_device and opf to bio_init Pass the block_device that we plan to use this bio for and the operation to bio_init to optimize the assignment. A NULL block_device can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-19-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 27 +++++++++++++-------------- block/blk-flush.c | 4 +--- block/blk-zoned.c | 5 +---- block/fops.c | 18 +++++++++--------- drivers/block/floppy.c | 4 +--- drivers/block/zram/zram_drv.c | 5 ++--- drivers/md/bcache/io.c | 3 ++- drivers/md/bcache/journal.c | 4 +--- drivers/md/bcache/movinggc.c | 4 ++-- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/super.c | 8 +++----- drivers/md/bcache/writeback.c | 4 ++-- drivers/md/dm.c | 5 ++--- drivers/md/md-multipath.c | 2 +- drivers/md/md.c | 8 +++----- drivers/md/raid5-cache.c | 2 +- drivers/md/raid5-ppl.c | 2 +- drivers/md/raid5.c | 4 ++-- drivers/nvme/target/io-cmd-bdev.c | 10 ++++------ drivers/nvme/target/passthru.c | 4 ++-- drivers/nvme/target/zns.c | 4 ++-- fs/iomap/buffered-io.c | 4 +--- fs/xfs/xfs_bio_io.c | 4 +--- fs/xfs/xfs_log.c | 14 +++++++------- fs/zonefs/super.c | 4 +--- include/linux/bio.h | 4 ++-- 26 files changed, 68 insertions(+), 91 deletions(-) diff --git a/block/bio.c b/block/bio.c index b73c9babd5835..b2133d86e885e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -249,12 +249,12 @@ static void bio_free(struct bio *bio) * they must remember to pair any call to bio_init() with bio_uninit() * when IO has completed, or when the bio is released. */ -void bio_init(struct bio *bio, struct bio_vec *table, - unsigned short max_vecs) +void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, + unsigned short max_vecs, unsigned int opf) { bio->bi_next = NULL; - bio->bi_bdev = NULL; - bio->bi_opf = 0; + bio->bi_bdev = bdev; + bio->bi_opf = opf; bio->bi_flags = 0; bio->bi_ioprio = 0; bio->bi_write_hint = 0; @@ -268,6 +268,8 @@ void bio_init(struct bio *bio, struct bio_vec *table, #ifdef CONFIG_BLK_CGROUP bio->bi_blkg = NULL; bio->bi_issue.value = 0; + if (bdev) + bio_associate_blkg(bio); #ifdef CONFIG_BLK_CGROUP_IOCOST bio->bi_iocost_cost = 0; #endif @@ -504,17 +506,14 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, if (unlikely(!bvl)) goto err_free; - bio_init(bio, bvl, nr_vecs); + bio_init(bio, bdev, bvl, nr_vecs, opf); } else if (nr_vecs) { - bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS); + bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf); } else { - bio_init(bio, NULL, 0); + bio_init(bio, bdev, NULL, 0, opf); } bio->bi_pool = bs; - if (bdev) - bio_set_dev(bio, bdev); - bio->bi_opf = opf; return bio; err_free: @@ -542,7 +541,8 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs) bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask); if (unlikely(!bio)) return NULL; - bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs); + bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs, + 0); bio->bi_pool = NULL; return bio; } @@ -1756,9 +1756,8 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, cache->free_list = bio->bi_next; cache->nr--; put_cpu(); - bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); - bio_set_dev(bio, bdev); - bio->bi_opf = opf; + bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, + nr_vecs, opf); bio->bi_pool = bs; bio_set_flag(bio, BIO_PERCPU_CACHE); return bio; diff --git a/block/blk-flush.c b/block/blk-flush.c index e4df894189ced..c689687248706 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -460,9 +460,7 @@ int blkdev_issue_flush(struct block_device *bdev) { struct bio bio; - bio_init(&bio, NULL, 0); - bio_set_dev(&bio, bdev); - bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + bio_init(&bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH); return submit_bio_wait(&bio); } EXPORT_SYMBOL(blkdev_issue_flush); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 5ab755d792c81..602bef54c8134 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -238,10 +238,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) { struct bio bio; - bio_init(&bio, NULL, 0); - bio_set_dev(&bio, bdev); - bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; - + bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC); return submit_bio_wait(&bio); } diff --git a/block/fops.c b/block/fops.c index c683596847731..3696665e586a8 100644 --- a/block/fops.c +++ b/block/fops.c @@ -75,8 +75,13 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, return -ENOMEM; } - bio_init(&bio, vecs, nr_pages); - bio_set_dev(&bio, bdev); + if (iov_iter_rw(iter) == READ) { + bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ); + if (iter_is_iovec(iter)) + should_dirty = true; + } else { + bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); + } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio.bi_write_hint = iocb->ki_hint; bio.bi_private = current; @@ -88,14 +93,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, goto out; ret = bio.bi_iter.bi_size; - if (iov_iter_rw(iter) == READ) { - bio.bi_opf = REQ_OP_READ; - if (iter_is_iovec(iter)) - should_dirty = true; - } else { - bio.bi_opf = dio_bio_write_op(iocb); + if (iov_iter_rw(iter) == WRITE) task_io_account_write(ret); - } + if (iocb->ki_flags & IOCB_NOWAIT) bio.bi_opf |= REQ_NOWAIT; if (iocb->ki_flags & IOCB_HIPRI) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index e611411a934ce..19c2d0327e157 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4129,15 +4129,13 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) cbdata.drive = drive; - bio_init(&bio, &bio_vec, 1); - bio_set_dev(&bio, bdev); + bio_init(&bio, bdev, &bio_vec, 1, REQ_OP_READ); bio_add_page(&bio, page, block_size(bdev), 0); bio.bi_iter.bi_sector = 0; bio.bi_flags |= (1 << BIO_QUIET); bio.bi_private = &cbdata; bio.bi_end_io = floppy_rb0_cb; - bio_set_op_attrs(&bio, REQ_OP_READ, 0); init_completion(&cbdata.complete); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f3fe0ea8aa80f..a3a5e1e713268 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -743,10 +743,9 @@ static ssize_t writeback_store(struct device *dev, continue; } - bio_init(&bio, &bio_vec, 1); - bio_set_dev(&bio, zram->bdev); + bio_init(&bio, zram->bdev, &bio_vec, 1, + REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); - bio.bi_opf = REQ_OP_WRITE | REQ_SYNC; bio_add_page(&bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 9c6f9ec55b724..020712c5203fd 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -26,7 +26,8 @@ struct bio *bch_bbio_alloc(struct cache_set *c) struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO); struct bio *bio = &b->bio; - bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb)); + bio_init(bio, NULL, bio->bi_inline_vecs, + meta_bucket_pages(&c->cache->sb), 0); return bio; } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 61bd79babf7ae..6d26c5b06e2b6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -611,11 +611,9 @@ static void do_journal_discard(struct cache *ca) atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); - bio_init(bio, bio->bi_inline_vecs, 1); - bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); + bio_init(bio, ca->bdev, bio->bi_inline_vecs, 1, REQ_OP_DISCARD); bio->bi_iter.bi_sector = bucket_to_sector(ca->set, ca->sb.d[ja->discard_idx]); - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index b9c3d27ec093a..99499d1f6e666 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -79,8 +79,8 @@ static void moving_init(struct moving_io *io) { struct bio *bio = &io->bio.bio; - bio_init(bio, bio->bi_inline_vecs, - DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS)); + bio_init(bio, NULL, bio->bi_inline_vecs, + DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0); bio_get(bio); bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index c4b7e434de8ac..d4b98ebffd948 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s, { struct bio *bio = &s->bio.bio; - bio_init(bio, NULL, 0); + bio_init(bio, NULL, NULL, 0, 0); __bio_clone_fast(bio, orig_bio); /* * bi_end_io can be set separately somewhere else, e.g. the diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c31a62b963f00..bf3de149d3c9f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -342,8 +342,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) down(&dc->sb_write_mutex); closure_init(cl, parent); - bio_init(bio, dc->sb_bv, 1); - bio_set_dev(bio, dc->bdev); + bio_init(bio, dc->bdev, dc->sb_bv, 1, 0); bio->bi_end_io = write_bdev_super_endio; bio->bi_private = dc; @@ -386,8 +385,7 @@ void bcache_write_super(struct cache_set *c) if (ca->sb.version < version) ca->sb.version = version; - bio_init(bio, ca->sb_bv, 1); - bio_set_dev(bio, ca->bdev); + bio_init(bio, ca->bdev, ca->sb_bv, 1, 0); bio->bi_end_io = write_super_endio; bio->bi_private = ca; @@ -2239,7 +2237,7 @@ static int cache_alloc(struct cache *ca) __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); - bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8); + bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0); /* * when ca->sb.njournal_buckets is not zero, journal exists, diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c7560f66dca88..d42301e6309d4 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -292,8 +292,8 @@ static void dirty_init(struct keybuf_key *w) struct dirty_io *io = w->private; struct bio *bio = &io->bio; - bio_init(bio, bio->bi_inline_vecs, - DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)); + bio_init(bio, NULL, bio->bi_inline_vecs, + DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0); if (!io->dc->writeback_percent) bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 069e29013b6be..fa596b654c99c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1303,9 +1303,8 @@ static int __send_empty_flush(struct clone_info *ci) * need to reference it after submit. It's just used as * the basis for the clone(s). */ - bio_init(&flush_bio, NULL, 0); - flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - bio_set_dev(&flush_bio, ci->io->md->disk->part0); + bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0, + REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC); ci->bio = &flush_bio; ci->sector_count = 0; diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index e7d6486f090ff..5e15940634d85 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -121,7 +121,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - bio_init(&mp_bh->bio, NULL, 0); + bio_init(&mp_bh->bio, NULL, NULL, 0, 0); __bio_clone_fast(&mp_bh->bio, bio); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; diff --git a/drivers/md/md.c b/drivers/md/md.c index 40fc1f7e65c5d..0a89f072dae0d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -998,13 +998,11 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct bio bio; struct bio_vec bvec; - bio_init(&bio, &bvec, 1); - if (metadata_op && rdev->meta_bdev) - bio_set_dev(&bio, rdev->meta_bdev); + bio_init(&bio, rdev->meta_bdev, &bvec, 1, op | op_flags); else - bio_set_dev(&bio, rdev->bdev); - bio.bi_opf = op | op_flags; + bio_init(&bio, rdev->bdev, &bvec, 1, op | op_flags); + if (metadata_op) bio.bi_iter.bi_sector = sector + rdev->sb_start; else if (rdev->mddev->reshape_position != MaxSector && diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 66313adf99875..98b9ca11c28d8 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -3108,7 +3108,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) INIT_LIST_HEAD(&log->io_end_ios); INIT_LIST_HEAD(&log->flushing_ios); INIT_LIST_HEAD(&log->finished_ios); - bio_init(&log->flush_bio, NULL, 0); + bio_init(&log->flush_bio, NULL, NULL, 0, 0); log->io_kc = KMEM_CACHE(r5l_io_unit, 0); if (!log->io_kc) diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 054d3bb252d48..3446797fa0aca 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -250,7 +250,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, INIT_LIST_HEAD(&io->stripe_list); atomic_set(&io->pending_stripes, 0); atomic_set(&io->pending_flushes, 0); - bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS); + bio_init(&io->bio, NULL, io->biovec, PPL_IO_INLINE_BVECS, 0); pplhdr = page_address(io->header_page); clear_page(pplhdr); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ffe720c73b0a5..a9dcc5bc9c329 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2310,8 +2310,8 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, for (i = 0; i < disks; i++) { struct r5dev *dev = &sh->dev[i]; - bio_init(&dev->req, &dev->vec, 1); - bio_init(&dev->rreq, &dev->rvec, 1); + bio_init(&dev->req, NULL, &dev->vec, 1, 0); + bio_init(&dev->rreq, NULL, &dev->rvec, 1, 0); } if (raid5_has_ppl(conf)) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index e092af3abc710..95c2bbb0b2f5f 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -267,9 +267,8 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) if (nvmet_use_inline_bvec(req)) { bio = &req->b.inline_bio; - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_opf = op; + bio_init(bio, req->ns->bdev, req->inline_bvec, + ARRAY_SIZE(req->inline_bvec), op); } else { bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op, GFP_KERNEL); @@ -328,11 +327,10 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio_set_dev(bio, req->ns->bdev); + bio_init(bio, req->ns->bdev, req->inline_bvec, + ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH); bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(bio); } diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 38f72968c3fde..a810bf569fff8 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -206,8 +206,8 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) if (nvmet_use_inline_bvec(req)) { bio = &req->p.inline_bio; - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio->bi_opf = req_op(rq); + bio_init(bio, NULL, req->inline_bvec, + ARRAY_SIZE(req->inline_bvec), req_op(rq)); } else { bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq), GFP_KERNEL); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 62c53e8f26d35..3e421217a7ade 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -552,8 +552,8 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req) if (nvmet_use_inline_bvec(req)) { bio = &req->z.inline_bio; - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio->bi_opf = op; + bio_init(bio, req->ns->bdev, req->inline_bvec, + ARRAY_SIZE(req->inline_bvec), op); } else { bio = bio_alloc(req->ns->bdev, req->sg_cnt, op, GFP_KERNEL); } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 70f3657a6ec06..491534e908615 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -549,10 +549,8 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, struct bio_vec bvec; struct bio bio; - bio_init(&bio, &bvec, 1); - bio.bi_opf = REQ_OP_READ; + bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); - bio_set_dev(&bio, iomap->bdev); bio_add_folio(&bio, folio, plen, poff); return submit_bio_wait(&bio); } diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index eff4a9f21dcff..32fa02945f739 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -36,9 +36,7 @@ xfs_flush_bdev_async( return; } - bio_init(bio, NULL, 0); - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + bio_init(bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC); bio->bi_private = done; bio->bi_end_io = xfs_flush_bdev_async_endio; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 89fec9a18c349..16f9edbda4eb3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1883,19 +1883,19 @@ xlog_write_iclog( return; } - bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); - bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); - iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; - iclog->ic_bio.bi_end_io = xlog_bio_end_io; - iclog->ic_bio.bi_private = iclog; - /* * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more * IOs coming immediately after this one. This prevents the block layer * writeback throttle from throttling log writes behind background * metadata writeback and causing priority inversions. */ - iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE; + bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec, + howmany(count, PAGE_SIZE), + REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE); + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; + iclog->ic_bio.bi_end_io = xlog_bio_end_io; + iclog->ic_bio.bi_private = iclog; + if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { iclog->ic_bio.bi_opf |= REQ_PREFLUSH; /* diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index c0fc2c326dcee..d331b52592a0a 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1540,10 +1540,8 @@ static int zonefs_read_super(struct super_block *sb) if (!page) return -ENOMEM; - bio_init(&bio, &bio_vec, 1); + bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = 0; - bio.bi_opf = REQ_OP_READ; - bio_set_dev(&bio, sb->s_bdev); bio_add_page(&bio, page, PAGE_SIZE, 0); ret = submit_bio_wait(&bio); diff --git a/include/linux/bio.h b/include/linux/bio.h index be6ac92913d48..41bedf727f59c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -456,8 +456,8 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; extern int submit_bio_wait(struct bio *bio); -extern void bio_init(struct bio *bio, struct bio_vec *table, - unsigned short max_vecs); +void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, + unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -- GitLab From a7c50c940477bae89fb2b4f51bd969a2d95d7512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:11:07 +0100 Subject: [PATCH 0199/1586] block: pass a block_device and opf to bio_reset Pass the block_device that we plan to use this bio for and the operation to bio_reset to optimize the assigment. A NULL block_device can be passed, both for the passthrough case on a raw request_queue and to temporarily avoid refactoring some nasty code. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220124091107.642561-20-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 6 +++++- drivers/block/pktcdvd.c | 8 ++------ drivers/md/bcache/journal.c | 12 ++++-------- drivers/md/bcache/request.c | 4 ++-- drivers/md/raid1.c | 5 ++--- drivers/md/raid10.c | 8 +++----- drivers/md/raid5-cache.c | 9 +++------ drivers/md/raid5.c | 8 ++++---- fs/btrfs/disk-io.c | 4 +--- fs/crypto/bio.c | 8 ++------ include/linux/bio.h | 9 +-------- 11 files changed, 29 insertions(+), 52 deletions(-) diff --git a/block/bio.c b/block/bio.c index b2133d86e885e..03cefe81950f2 100644 --- a/block/bio.c +++ b/block/bio.c @@ -295,6 +295,8 @@ EXPORT_SYMBOL(bio_init); /** * bio_reset - reinitialize a bio * @bio: bio to reset + * @bdev: block device to use the bio for + * @opf: operation and flags for bio * * Description: * After calling bio_reset(), @bio will be in the same state as a freshly @@ -302,11 +304,13 @@ EXPORT_SYMBOL(bio_init); * preserved are the ones that are initialized by bio_alloc_bioset(). See * comment in struct bio. */ -void bio_reset(struct bio *bio) +void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf) { bio_uninit(bio); memset(bio, 0, BIO_RESET_BYTES); atomic_set(&bio->__bi_remaining, 1); + bio->bi_bdev = bdev; + bio->bi_opf = opf; } EXPORT_SYMBOL(bio_reset); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2b6b70a39e760..3aa5954429462 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1020,9 +1020,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) continue; bio = pkt->r_bios[f]; - bio_reset(bio); + bio_reset(bio, pd->bdev, REQ_OP_READ); bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); - bio_set_dev(bio, pd->bdev); bio->bi_end_io = pkt_end_io_read; bio->bi_private = pkt; @@ -1034,7 +1033,6 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) BUG(); atomic_inc(&pkt->io_wait); - bio_set_op_attrs(bio, REQ_OP_READ, 0); pkt_queue_bio(pd, bio); frames_read++; } @@ -1235,9 +1233,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) { int f; - bio_reset(pkt->w_bio); + bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE); pkt->w_bio->bi_iter.bi_sector = pkt->sector; - bio_set_dev(pkt->w_bio, pd->bdev); pkt->w_bio->bi_end_io = pkt_end_io_packet_write; pkt->w_bio->bi_private = pkt; @@ -1270,7 +1267,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) /* Start the write request */ atomic_set(&pkt->io_wait, 1); - bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0); pkt_queue_bio(pd, pkt->w_bio); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 6d26c5b06e2b6..7c2ca52ca3e43 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -53,14 +53,12 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, reread: left = ca->sb.bucket_size - offset; len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); - bio_reset(bio); + bio_reset(bio, ca->bdev, REQ_OP_READ); bio->bi_iter.bi_sector = bucket + offset; - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = len << 9; bio->bi_end_io = journal_read_endio; bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, 0); bch_bio_map(bio, data); closure_bio_submit(ca->set, bio, &cl); @@ -771,16 +769,14 @@ static void journal_write_unlocked(struct closure *cl) atomic_long_add(sectors, &ca->meta_sectors_written); - bio_reset(bio); + bio_reset(bio, ca->bdev, REQ_OP_WRITE | + REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA); + bch_bio_map(bio, w->data); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; bio->bi_private = w; - bio_set_op_attrs(bio, REQ_OP_WRITE, - REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA); - bch_bio_map(bio, w->data); trace_bcache_journal_write(bio, w->data->keys); bio_list_add(&list, bio); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index d4b98ebffd948..7ba59d08ed870 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -831,11 +831,11 @@ static void cached_dev_read_done(struct closure *cl) */ if (s->iop.bio) { - bio_reset(s->iop.bio); + bio_reset(s->iop.bio, s->cache_miss->bi_bdev, REQ_OP_READ); s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector; - bio_copy_dev(s->iop.bio, s->cache_miss); s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; + bio_clone_blkg_association(s->iop.bio, s->cache_miss); bch_bio_map(s->iop.bio, NULL); bio_copy_data(s->cache_miss, s->iop.bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 43276f8fdc815..e7710fb5befb4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2166,11 +2166,10 @@ static void process_checks(struct r1bio *r1_bio) continue; /* fixup the bio for reuse, but preserve errno */ status = b->bi_status; - bio_reset(b); + bio_reset(b, conf->mirrors[i].rdev->bdev, REQ_OP_READ); b->bi_status = status; b->bi_iter.bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; - bio_set_dev(b, conf->mirrors[i].rdev->bdev); b->bi_end_io = end_sync_read; rp->raid_bio = r1_bio; b->bi_private = rp; @@ -2651,7 +2650,7 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) for (i = conf->poolinfo->raid_disks; i--; ) { bio = r1bio->bios[i]; rps = bio->bi_private; - bio_reset(bio); + bio_reset(bio, NULL, 0); bio->bi_private = rps; } r1bio->master_bio = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index cb7c58050708e..da07bcbc06d08 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2422,7 +2422,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * bi_vecs, as the read request might have corrupted these */ rp = get_resync_pages(tbio); - bio_reset(tbio); + bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE); md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size); @@ -2430,7 +2430,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) tbio->bi_private = rp; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; tbio->bi_end_io = end_sync_write; - bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); bio_copy_data(tbio, fbio); @@ -2441,7 +2440,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) tbio->bi_opf |= MD_FAILFAST; tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset; - bio_set_dev(tbio, conf->mirrors[d].rdev->bdev); submit_bio_noacct(tbio); } @@ -3160,12 +3158,12 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) for (i = 0; i < nalloc; i++) { bio = r10bio->devs[i].bio; rp = bio->bi_private; - bio_reset(bio); + bio_reset(bio, NULL, 0); bio->bi_private = rp; bio = r10bio->devs[i].repl_bio; if (bio) { rp = bio->bi_private; - bio_reset(bio); + bio_reset(bio, NULL, 0); bio->bi_private = rp; } } diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 98b9ca11c28d8..86e2bb89d9c7b 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1301,10 +1301,9 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) if (!do_flush) return; - bio_reset(&log->flush_bio); - bio_set_dev(&log->flush_bio, log->rdev->bdev); + bio_reset(&log->flush_bio, log->rdev->bdev, + REQ_OP_WRITE | REQ_PREFLUSH); log->flush_bio.bi_end_io = r5l_log_flush_endio; - log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(&log->flush_bio); } @@ -1678,9 +1677,7 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log, struct r5l_recovery_ctx *ctx, sector_t offset) { - bio_reset(ctx->ra_bio); - bio_set_dev(ctx->ra_bio, log->rdev->bdev); - bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0); + bio_reset(ctx->ra_bio, log->rdev->bdev, REQ_OP_READ); ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset; ctx->valid_pages = 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a9dcc5bc9c329..7c119208a2143 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2677,7 +2677,7 @@ static void raid5_end_read_request(struct bio * bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_status); if (i == disks) { - bio_reset(bi); + bio_reset(bi, NULL, 0); BUG(); return; } @@ -2785,7 +2785,7 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); - bio_reset(bi); + bio_reset(bi, NULL, 0); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); @@ -2823,7 +2823,7 @@ static void raid5_end_write_request(struct bio *bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_status); if (i == disks) { - bio_reset(bi); + bio_reset(bi, NULL, 0); BUG(); return; } @@ -2860,7 +2860,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_status && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); - bio_reset(bi); + bio_reset(bi, NULL, 0); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f45aa506f9a6f..505ba21230b1f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4154,10 +4154,8 @@ static void write_dev_flush(struct btrfs_device *device) return; #endif - bio_reset(bio); + bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH); bio->bi_end_io = btrfs_end_empty_barrier; - bio_set_dev(bio, device->bdev); - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 755e985a42e0b..2217fe5ece6f9 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -80,9 +80,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, err = submit_bio_wait(bio); if (err) goto out; - bio_reset(bio); - bio_set_dev(bio, inode->i_sb->s_bdev); - bio->bi_opf = REQ_OP_WRITE; + bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE); num_pages = 0; } } @@ -181,9 +179,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, err = submit_bio_wait(bio); if (err) goto out; - bio_reset(bio); - bio_set_dev(bio, inode->i_sb->s_bdev); - bio->bi_opf = REQ_OP_WRITE; + bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE); } while (len != 0); err = 0; out: diff --git a/include/linux/bio.h b/include/linux/bio.h index 41bedf727f59c..18cfe5bb41ea8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -459,7 +459,7 @@ extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); -extern void bio_reset(struct bio *); +void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -517,13 +517,6 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) bio_associate_blkg(bio); } -static inline void bio_copy_dev(struct bio *dst, struct bio *src) -{ - bio_clear_flag(dst, BIO_REMAPPED); - dst->bi_bdev = src->bi_bdev; - bio_clone_blkg_association(dst, src); -} - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * -- GitLab From b1f866b013e6e5583f2f0bf4a61d13eddb9a1799 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:48 +0100 Subject: [PATCH 0200/1586] block: remove blk_needs_flush_plug blk_needs_flush_plug fails to account for the cb_list, which needs flushing as well. Remove it and just check if there is a plug instead of poking into the internals of the plug structure. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127070549.1377856-1-hch@lst.de Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 2 +- include/linux/blkdev.h | 13 ------------- kernel/exit.c | 2 +- kernel/sched/core.c | 2 +- 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f8d7fe6db989e..f4ce38f6fc31c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2301,7 +2301,7 @@ void wakeup_flusher_threads(enum wb_reason reason) /* * If we are expecting writeback progress we must submit plugged IO. */ - if (blk_needs_flush_plug(current)) + if (current->plug) blk_flush_plug(current->plug, true); rcu_read_lock(); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99a4384bb8a56..f902a1c2fac0e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1055,14 +1055,6 @@ extern void blk_finish_plug(struct blk_plug *); void blk_flush_plug(struct blk_plug *plug, bool from_schedule); -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - return plug && - (plug->mq_list || !list_empty(&plug->cb_list)); -} - int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -1086,11 +1078,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab93..11fc6c9df9f28 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -735,7 +735,7 @@ void __noreturn do_exit(long code) struct task_struct *tsk = current; int group_dead; - WARN_ON(blk_needs_flush_plug(tsk)); + WARN_ON(tsk->plug); /* * If do_dead is called because this processes oopsed, it's possible diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 848eaa0efe0ea..3487bb92d1f2a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6344,7 +6344,7 @@ static inline void sched_submit_work(struct task_struct *tsk) * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. */ - if (blk_needs_flush_plug(tsk)) + if (tsk->plug) blk_flush_plug(tsk->plug, true); } -- GitLab From aa8dcccaf32bfdc09f2aff089d5d60c37da5b7b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:49 +0100 Subject: [PATCH 0201/1586] block: check that there is a plug in blk_flush_plug Rename blk_flush_plug to __blk_flush_plug and add a wrapper that includes the NULL check instead of open coding that check everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220127070549.1377856-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 7 +++---- fs/fs-writeback.c | 6 ++---- include/linux/blkdev.h | 7 ++++++- kernel/sched/core.c | 7 ++----- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index d93e3bb9a769b..61f6a0dc4511a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -991,8 +991,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) return 0; - if (current->plug) - blk_flush_plug(current->plug, false); + blk_flush_plug(current->plug, false); if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) return 0; @@ -1274,7 +1273,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, } EXPORT_SYMBOL(blk_check_plugged); -void blk_flush_plug(struct blk_plug *plug, bool from_schedule) +void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) { if (!list_empty(&plug->cb_list)) flush_plug_callbacks(plug, from_schedule); @@ -1303,7 +1302,7 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule) void blk_finish_plug(struct blk_plug *plug) { if (plug == current->plug) { - blk_flush_plug(plug, false); + __blk_flush_plug(plug, false); current->plug = NULL; } } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f4ce38f6fc31c..33d54c9fbefc0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1903,8 +1903,7 @@ static long writeback_sb_inodes(struct super_block *sb, * unplug, so get our IOs out the door before we * give up the CPU. */ - if (current->plug) - blk_flush_plug(current->plug, false); + blk_flush_plug(current->plug, false); cond_resched(); } @@ -2301,8 +2300,7 @@ void wakeup_flusher_threads(enum wb_reason reason) /* * If we are expecting writeback progress we must submit plugged IO. */ - if (current->plug) - blk_flush_plug(current->plug, true); + blk_flush_plug(current->plug, true); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f902a1c2fac0e..654163d3b9036 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1053,7 +1053,12 @@ extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -void blk_flush_plug(struct blk_plug *plug, bool from_schedule); +void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); +static inline void blk_flush_plug(struct blk_plug *plug, bool async) +{ + if (plug) + __blk_flush_plug(plug, async); +} int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3487bb92d1f2a..46152982e4001 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6344,8 +6344,7 @@ static inline void sched_submit_work(struct task_struct *tsk) * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. */ - if (tsk->plug) - blk_flush_plug(tsk->plug, true); + blk_flush_plug(tsk->plug, true); } static void sched_update_worker(struct task_struct *tsk) @@ -8371,9 +8370,7 @@ int io_schedule_prepare(void) int old_iowait = current->in_iowait; current->in_iowait = 1; - if (current->plug) - blk_flush_plug(current->plug, true); - + blk_flush_plug(current->plug, true); return old_iowait; } -- GitLab From b42c1fc3d55e077d36718ad9800d89100b2aff81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 07:41:25 +0100 Subject: [PATCH 0202/1586] block: fix the kerneldoc for bio_end_io_acct Document the actually existing parameter name. Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127064125.1314347-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 654163d3b9036..3bfc75a2a4509 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1520,7 +1520,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, /** * bio_end_io_acct - end I/O accounting for bio based drivers * @bio: bio to end account for - * @start: start time returned by bio_start_io_acct() + * @start_time: start time returned by bio_start_io_acct() */ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { -- GitLab From 3767c902719befe44067717fa893a9ef8cb8747d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 07:42:21 +0100 Subject: [PATCH 0203/1586] MAINTAINERS: add bio.h to the block section bio.h is part of the block layer, so list it in the MAINTAINERS file as such. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127064221.1314477-1-hch@lst.de Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f41088418aae2..f88a16ba8b475 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3436,6 +3436,7 @@ F: Documentation/ABI/stable/sysfs-block F: Documentation/block/ F: block/ F: drivers/block/ +F: include/linux/bio.h F: include/linux/blk* F: kernel/trace/blktrace.c F: lib/sbitmap.c -- GitLab From 455a844d6345639be682977b3f0126be446b702e Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 28 Jan 2022 12:34:54 +0800 Subject: [PATCH 0204/1586] block: fix boolreturn.cocci warning Return statements in functions returning bool should use true/false instead of 1/0. ./block/bio.c:1081:9-10: WARNING: return of 0/1 in function 'bio_add_folio' with return type bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220128043454.68927-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 03cefe81950f2..2e19ca600fcdb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1078,7 +1078,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off) { if (len > UINT_MAX || off > UINT_MAX) - return 0; + return false; return bio_add_page(bio, &folio->page, len, off) > 0; } -- GitLab From 365ab499153cdb2007d54e7e62bcbf2c67f7ab8f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Jan 2022 17:09:22 +0300 Subject: [PATCH 0205/1586] fs/ntfs3: remove unnecessary NULL check This code triggers a Smatch warning: fs/ntfs3/fsntfs.c:1606 ntfs_bio_fill_1() warn: variable dereferenced before check 'bio' (see line 1591) The "bio" pointer cannot be NULL so there is no need to check. Originally there was more extensive NULL checking but it was removed because bio_alloc() will never fail if it is allowed to sleep. Remove this check as well. Fixes: 39146b6f66ba ("ntfs3: remove ntfs_alloc_bio") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220128140922.GA29766@kili Signed-off-by: Jens Axboe --- fs/ntfs3/fsntfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 0660a07c5a96e..3de5700a9b833 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1603,11 +1603,10 @@ new_bio: } } while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen)); - if (bio) { - if (!err) - err = submit_bio_wait(bio); - bio_put(bio); - } + if (!err) + err = submit_bio_wait(bio); + bio_put(bio); + blk_finish_plug(&plug); out: unlock_page(fill); -- GitLab From 7030c428fae100c339436f5cb6f9e7c0574097ad Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 Feb 2022 21:05:59 +0100 Subject: [PATCH 0206/1586] spi: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/2231987.ElGaqSPkdT@kreacher Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ec9f2ed579e34..c98c55f44ee6c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2585,10 +2585,10 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level, void *data, void **return_value) { + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); struct spi_controller *ctlr = data; - struct acpi_device *adev; - if (acpi_bus_get_device(handle, &adev)) + if (!adev) return AE_OK; return acpi_register_spi_device(ctlr, adev); -- GitLab From 8d37f2710f022837635d9f97db3ac8c853e86979 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 2 Feb 2022 00:45:35 +0100 Subject: [PATCH 0207/1586] spi: mpc512x-psc: Fix compile errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My patch created compilation bugs in the MPC512x-PSC driver. Fix them up. Cc: Uwe Kleine-König Cc: Anatolij Gustschin Cc: linuxppc-dev@lists.ozlabs.org Reported-by: kernel test robot Fixes: 2818824ced4b (" spi: mpc512x-psc: Convert to use GPIO descriptors") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220201234535.569973-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-mpc512x-psc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 8a488d8e4c1be..03630359ce70d 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -127,7 +127,7 @@ static void mpc512x_psc_spi_activate_cs(struct spi_device *spi) out_be32(psc_addr(mps, ccr), ccr); mps->bits_per_word = cs->bits_per_word; - if (cs->gpiod) { + if (spi->cs_gpiod) { if (mps->cs_control) /* boardfile override */ mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0); @@ -373,7 +373,6 @@ static int mpc512x_psc_spi_unprep_xfer_hw(struct spi_master *master) static int mpc512x_psc_spi_setup(struct spi_device *spi) { struct mpc512x_psc_spi_cs *cs = spi->controller_state; - int ret; if (spi->bits_per_word % 8) return -EINVAL; -- GitLab From fe13889c390e14205e064d7e159e61eb5da4b1c3 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 28 Jan 2022 19:07:27 +0800 Subject: [PATCH 0208/1586] genirq, softirq: Use in_hardirq() instead of in_irq() Replace the obsolete and ambiguos macro in_irq() with the new macro in_hardirq(). Signed-off-by: Changbin Du Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220128110727.5110-1-changbin.du@gmail.com --- kernel/irq/irqdesc.c | 4 ++-- kernel/softirq.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527db3c..6167d32e27da2 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -640,7 +640,7 @@ int handle_irq_desc(struct irq_desc *desc) return -EINVAL; data = irq_desc_get_irq_data(desc); - if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data))) + if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data))) return -EPERM; generic_handle_irq_desc(desc); @@ -676,7 +676,7 @@ EXPORT_SYMBOL_GPL(generic_handle_irq); */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - WARN_ON_ONCE(!in_irq()); + WARN_ON_ONCE(!in_hardirq()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); diff --git a/kernel/softirq.c b/kernel/softirq.c index 41f470929e991..fac801815554a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -222,7 +222,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) u32 pending; int curcnt; - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); lockdep_assert_irqs_enabled(); local_irq_save(flags); @@ -305,7 +305,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); raw_local_irq_save(flags); /* @@ -352,14 +352,14 @@ static void __local_bh_enable(unsigned int cnt) */ void _local_bh_enable(void) { - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); __local_bh_enable(SOFTIRQ_DISABLE_OFFSET); } EXPORT_SYMBOL(_local_bh_enable); void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) { - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); lockdep_assert_irqs_enabled(); #ifdef CONFIG_TRACE_IRQFLAGS local_irq_disable(); @@ -618,7 +618,7 @@ static inline void tick_irq_exit(void) /* Make sure that timer wheel updates are propagated */ if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { - if (!in_irq()) + if (!in_hardirq()) tick_nohz_irq_exit(); } #endif -- GitLab From d2d8e896485a52554cea486816c171dc7240792e Mon Sep 17 00:00:00 2001 From: Konstantin Vyshetsky Date: Mon, 13 Dec 2021 17:12:03 -0800 Subject: [PATCH 0209/1586] f2fs: move discard parameters into discard_cmd_control This patch unifies parameters related to how often discard is issued and how many requests go out at the same time by placing them in discard_cmd_control. The move will allow the parameters to be modified in the future without relying on hard-coded values. Signed-off-by: Konstantin Vyshetsky Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/segment.c | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8178a9152e496..63c90416364bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -398,6 +398,10 @@ struct discard_cmd_control { struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int max_discard_request; /* max. discard request per round */ + unsigned int min_discard_issue_time; /* min. interval between discard issue */ + unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ + unsigned int max_discard_issue_time; /* max. interval between discard issue */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 216538b573310..56211e201d51b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1156,14 +1156,14 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->ordered = false; dpolicy->granularity = granularity; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->max_requests = dcc->max_discard_request; dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->min_interval = dcc->min_discard_issue_time; + dpolicy->mid_interval = dcc->mid_discard_issue_time; + dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = true; dpolicy->sync = false; dpolicy->ordered = true; @@ -1171,12 +1171,12 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->granularity = 1; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = - DEF_MIN_DISCARD_ISSUE_TIME; + dcc->min_discard_issue_time; } } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->min_interval = dcc->min_discard_issue_time; + dpolicy->mid_interval = dcc->mid_discard_issue_time; + dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; @@ -1781,7 +1781,7 @@ static int issue_discard_thread(void *data) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; - unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + unsigned int wait_ms = dcc->min_discard_issue_time; int issued; set_freezable(); @@ -2180,6 +2180,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; + dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; + dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; + dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; dcc->undiscard_blks = 0; dcc->next_pos = 0; dcc->root = RB_ROOT_CACHED; -- GitLab From b2e4a2b300e5e2042e8d92ec16fc124222b7ecc9 Mon Sep 17 00:00:00 2001 From: Konstantin Vyshetsky Date: Mon, 13 Dec 2021 17:12:43 -0800 Subject: [PATCH 0210/1586] f2fs: expose discard related parameters in sysfs This patch exposes max_discard_request, min_discard_issue_time, mid_discard_issue_time, and max_discard_issue_time in sysfs. This will allow the user to fine tune discard operations. Signed-off-by: Konstantin Vyshetsky Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 27 +++++++++++++++++++++++++ fs/f2fs/sysfs.c | 8 ++++++++ 2 files changed, 35 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 2416b03ff2837..87d3884c90ea6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -98,6 +98,33 @@ Description: Controls the issue rate of discard commands that consist of small checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. +What: /sys/fs/f2fs//max_discard_request +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the number of discards a thread will issue at a time. + Higher number will allow the discard thread to finish its work + faster, at the cost of higher latency for incomming I/O. + +What: /sys/fs/f2fs//min_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait between + issuing discard requests when there are discards to be issued and + no I/O aware interruptions occur. + +What: /sys/fs/f2fs//mid_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait between + issuing discard requests when there are discards to be issued and + an I/O aware interruption occurs. + +What: /sys/fs/f2fs//max_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait when there are + no discard operations to be issued. + What: /sys/fs/f2fs//discard_granularity Date: July 2017 Contact: "Chao Yu" diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2bccdaedfb000..281bc0133ee6f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -716,6 +716,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); @@ -832,6 +836,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reclaim_segments), ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), + ATTR_LIST(max_discard_request), + ATTR_LIST(min_discard_issue_time), + ATTR_LIST(mid_discard_issue_time), + ATTR_LIST(max_discard_issue_time), ATTR_LIST(discard_granularity), ATTR_LIST(pending_discard), ATTR_LIST(batched_trim_sections), -- GitLab From 4e28b22225e3a8b7f1bd3e093301802a2238071a Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Wed, 2 Feb 2022 11:47:15 +0100 Subject: [PATCH 0211/1586] spi: dt-bindings: add mising description type to reg property Added missing description type. Fixes warning: Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml: properties:reg:items: 'anyOf' conditional failed Fixes: a708078eeb99 ("spi: Add Sunplus SP7021 schema") Signed-off-by: David Heidelberg Link: https://lore.kernel.org/r/20220202104715.27839-1-david@ixit.cz Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml index 24382cdda6455..38589fdbc80db 100644 --- a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml +++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml @@ -20,8 +20,8 @@ properties: reg: items: - - the SPI master registers - - the SPI slave registers + - description: the SPI master registers + - description: the SPI slave registers reg-names: items: -- GitLab From 4a960e8941bd59fe20f8f774de371f40f222a0c7 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Thu, 27 Jan 2022 11:34:48 -0800 Subject: [PATCH 0212/1586] x86/Documentation: Describe the Intel Hardware Feedback Interface Start a documentation file to describe the purpose and operation of Intel's Hardware Feedback Interface. Describe how this interface is used in Linux to relay performance and energy efficiency updates to userspace. Reviewed-by: Len Brown Suggested-by: Srinivas Pandruvada Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki --- Documentation/x86/index.rst | 1 + Documentation/x86/intel-hfi.rst | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 Documentation/x86/intel-hfi.rst diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index f498f1d36cd3c..982c8af853b9f 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -21,6 +21,7 @@ x86-specific Documentation tlb mtrr pat + intel-hfi intel-iommu intel_txt amd-memory-encryption diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst new file mode 100644 index 0000000000000..49dea58ea4fb2 --- /dev/null +++ b/Documentation/x86/intel-hfi.rst @@ -0,0 +1,72 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================ +Hardware-Feedback Interface for scheduling on Intel Hardware +============================================================ + +Overview +-------- + +Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and +IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section +14.6 [1]_. + +The HFI gives the operating system a performance and energy efficiency +capability data for each CPU in the system. Linux can use the information from +the HFI to influence task placement decisions. + +The Hardware Feedback Interface +------------------------------- + +The Hardware Feedback Interface provides to the operating system information +about the performance and energy efficiency of each CPU in the system. Each +capability is given as a unit-less quantity in the range [0-255]. Higher values +indicate higher capability. Energy efficiency and performance are reported in +separate capabilities. Even though on some systems these two metrics may be +related, they are specified as independent capabilities in the Intel SDM. + +These capabilities may change at runtime as a result of changes in the +operating conditions of the system or the action of external factors. The rate +at which these capabilities are updated is specific to each processor model. On +some models, capabilities are set at boot time and never change. On others, +capabilities may change every tens of milliseconds. For instance, a remote +mechanism may be used to lower Thermal Design Power. Such change can be +reflected in the HFI. Likewise, if the system needs to be throttled due to +excessive heat, the HFI may reflect reduced performance on specific CPUs. + +The kernel or a userspace policy daemon can use these capabilities to modify +task placement decisions. For instance, if either the performance or energy +capabilities of a given logical processor becomes zero, it is an indication that +the hardware recommends to the operating system to not schedule any tasks on +that processor for performance or energy efficiency reasons, respectively. + +Implementation details for Linux +-------------------------------- + +The infrastructure to handle thermal event interrupts has two parts. In the +Local Vector Table of a CPU's local APIC, there exists a register for the +Thermal Monitor Register. This register controls how interrupts are delivered +to a CPU when the thermal monitor generates and interrupt. Further details +can be found in the Intel SDM Vol. 3 Section 10.5 [1]_. + +The thermal monitor may generate interrupts per CPU or per package. The HFI +generates package-level interrupts. This monitor is configured and initialized +via a set of machine-specific registers. Specifically, the HFI interrupt and +status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT +and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI +table per package. Further details can be found in the Intel SDM Vol. 3 +Section 14.9 [1]_. + +The hardware issues an HFI interrupt after updating the HFI table and is ready +for the operating system to consume it. CPUs receive such interrupt via the +thermal entry in the Local APIC's Local Vector Table. + +When servicing such interrupt, the HFI driver parses the updated table and +relays the update to userspace using the thermal notification framework. Given +that there may be many HFI updates every second, the updates relayed to +userspace are throttled at a rate of CONFIG_HZ jiffies. + +References +---------- + +.. [1] https://www.intel.com/sdm -- GitLab From 7b8f40b3de75c971a4e5f9308b06deb59118dbac Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Thu, 27 Jan 2022 11:34:49 -0800 Subject: [PATCH 0213/1586] x86/cpu: Add definitions for the Intel Hardware Feedback Interface Add the CPUID feature bit and the model-specific registers needed to identify and configure the Intel Hardware Feedback Interface. Acked-by: Borislav Petkov Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6db4e2932b3d8..b39f510128f3a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -330,6 +330,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3faf0f97edb1b..89e0a1d5ee36d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -704,12 +704,14 @@ #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) +#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26) #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) +#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25) /* Thermal Thresholds Support */ #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) @@ -958,4 +960,8 @@ #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +/* Hardware Feedback Interface */ +#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 +#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 + #endif /* _ASM_X86_MSR_INDEX_H */ -- GitLab From 1cb19cabeb0e187b6c244d0da73d27f7432c40dc Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Thu, 27 Jan 2022 11:34:50 -0800 Subject: [PATCH 0214/1586] thermal: intel: hfi: Minimally initialize the Hardware Feedback Interface The Intel Hardware Feedback Interface provides guidance to the operating system about the performance and energy efficiency capabilities of each CPU in the system. Capabilities are numbers between 0 and 255 where a higher number represents a higher capability. For each CPU, energy efficiency and performance are reported as separate capabilities. Hardware computes these capabilities based on the operating conditions of the system such as power and thermal limits. These capabilities are shared with the operating system in a table resident in memory. Each package in the system has its own HFI instance. Every logical CPU in the package is represented in the table. More than one logical CPUs may be represented in a single table entry. When the hardware updates the table, it generates a package-level thermal interrupt. The size and format of the HFI table depend on the supported features and can only be determined at runtime. To minimally initialize the HFI, parse its features and allocate one instance per package of a data structure with the necessary parameters to read and navigate a local copy (i.e., owned by the driver) of individual HFI tables. A subsequent changeset will provide per-CPU initialization and interrupt handling. Reviewed-by: Len Brown Co-developed by: Aubrey Li Signed-off-by: Aubrey Li Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/Kconfig | 12 ++ drivers/thermal/intel/Makefile | 1 + drivers/thermal/intel/intel_hfi.c | 181 ++++++++++++++++++++++++++++ drivers/thermal/intel/intel_hfi.h | 11 ++ drivers/thermal/intel/therm_throt.c | 3 + 5 files changed, 208 insertions(+) create mode 100644 drivers/thermal/intel/intel_hfi.c create mode 100644 drivers/thermal/intel/intel_hfi.h diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index c83ea5d04a1da..e9d2925227d4f 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -99,3 +99,15 @@ config INTEL_MENLOW Intel Menlow platform. If unsure, say N. + +config INTEL_HFI_THERMAL + bool "Intel Hardware Feedback Interface" + depends on CPU_SUP_INTEL + depends on X86_THERMAL_VECTOR + help + Select this option to enable the Hardware Feedback Interface. If + selected, hardware provides guidance to the operating system on + the performance and energy efficiency capabilities of each CPU. + These capabilities may change as a result of changes in the operating + conditions of the system such power and thermal limits. If selected, + the kernel relays updates in CPUs' capabilities to userspace. diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile index 960b56268b4a5..9a8d8054f3166 100644 --- a/drivers/thermal/intel/Makefile +++ b/drivers/thermal/intel/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o obj-$(CONFIG_INTEL_TCC_COOLING) += intel_tcc_cooling.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o +obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c new file mode 100644 index 0000000000000..969f4a1509730 --- /dev/null +++ b/drivers/thermal/intel/intel_hfi.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hardware Feedback Interface Driver + * + * Copyright (c) 2021, Intel Corporation. + * + * Authors: Aubrey Li + * Ricardo Neri + * + * + * The Hardware Feedback Interface provides a performance and energy efficiency + * capability information for each CPU in the system. Depending on the processor + * model, hardware may periodically update these capabilities as a result of + * changes in the operating conditions (e.g., power limits or thermal + * constraints). On other processor models, there is a single HFI update + * at boot. + * + * This file provides functionality to process HFI updates and relay these + * updates to userspace. + */ + +#define pr_fmt(fmt) "intel-hfi: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "intel_hfi.h" + +/* CPUID detection and enumeration definitions for HFI */ + +#define CPUID_HFI_LEAF 6 + +union hfi_capabilities { + struct { + u8 performance:1; + u8 energy_efficiency:1; + u8 __reserved:6; + } split; + u8 bits; +}; + +union cpuid6_edx { + struct { + union hfi_capabilities capabilities; + u32 table_pages:4; + u32 __reserved:4; + s32 index:16; + } split; + u32 full; +}; + +/** + * struct hfi_cpu_data - HFI capabilities per CPU + * @perf_cap: Performance capability + * @ee_cap: Energy efficiency capability + * + * Capabilities of a logical processor in the HFI table. These capabilities are + * unitless. + */ +struct hfi_cpu_data { + u8 perf_cap; + u8 ee_cap; +} __packed; + +/** + * struct hfi_hdr - Header of the HFI table + * @perf_updated: Hardware updated performance capabilities + * @ee_updated: Hardware updated energy efficiency capabilities + * + * Properties of the data in an HFI table. + */ +struct hfi_hdr { + u8 perf_updated; + u8 ee_updated; +} __packed; + +/** + * struct hfi_instance - Representation of an HFI instance (i.e., a table) + * @local_table: Base of the local copy of the HFI table + * @timestamp: Timestamp of the last update of the local table. + * Located at the base of the local table. + * @hdr: Base address of the header of the local table + * @data: Base address of the data of the local table + * + * A set of parameters to parse and navigate a specific HFI table. + */ +struct hfi_instance { + union { + void *local_table; + u64 *timestamp; + }; + void *hdr; + void *data; +}; + +/** + * struct hfi_features - Supported HFI features + * @nr_table_pages: Size of the HFI table in 4KB pages + * @cpu_stride: Stride size to locate the capability data of a logical + * processor within the table (i.e., row stride) + * @hdr_size: Size of the table header + * + * Parameters and supported features that are common to all HFI instances + */ +struct hfi_features { + unsigned int nr_table_pages; + unsigned int cpu_stride; + unsigned int hdr_size; +}; + +static int max_hfi_instances; +static struct hfi_instance *hfi_instances; + +static struct hfi_features hfi_features; + +static __init int hfi_parse_features(void) +{ + unsigned int nr_capabilities; + union cpuid6_edx edx; + + if (!boot_cpu_has(X86_FEATURE_HFI)) + return -ENODEV; + + /* + * If we are here we know that CPUID_HFI_LEAF exists. Parse the + * supported capabilities and the size of the HFI table. + */ + edx.full = cpuid_edx(CPUID_HFI_LEAF); + + if (!edx.split.capabilities.split.performance) { + pr_debug("Performance reporting not supported! Not using HFI\n"); + return -ENODEV; + } + + /* + * The number of supported capabilities determines the number of + * columns in the HFI table. Exclude the reserved bits. + */ + edx.split.capabilities.split.__reserved = 0; + nr_capabilities = hweight8(edx.split.capabilities.bits); + + /* The number of 4KB pages required by the table */ + hfi_features.nr_table_pages = edx.split.table_pages + 1; + + /* + * The header contains change indications for each supported feature. + * The size of the table header is rounded up to be a multiple of 8 + * bytes. + */ + hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; + + /* + * Data of each logical processor is also rounded up to be a multiple + * of 8 bytes. + */ + hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; + + return 0; +} + +void __init intel_hfi_init(void) +{ + if (hfi_parse_features()) + return; + + /* There is one HFI instance per die/package. */ + max_hfi_instances = topology_max_packages() * + topology_max_die_per_package(); + + /* + * This allocation may fail. CPU hotplug callbacks must check + * for a null pointer. + */ + hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), + GFP_KERNEL); +} diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h new file mode 100644 index 0000000000000..05f748b48a4e4 --- /dev/null +++ b/drivers/thermal/intel/intel_hfi.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_HFI_H +#define _INTEL_HFI_H + +#if defined(CONFIG_INTEL_HFI_THERMAL) +void __init intel_hfi_init(void); +#else +static inline void intel_hfi_init(void) { } +#endif /* CONFIG_INTEL_HFI_THERMAL */ + +#endif /* _INTEL_HFI_H */ diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index dab7e8fb10598..ac408714d52b8 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -32,6 +32,7 @@ #include #include +#include "intel_hfi.h" #include "thermal_interrupt.h" /* How long to wait between reporting thermal events */ @@ -509,6 +510,8 @@ static __init int thermal_throttle_init_device(void) if (!atomic_read(&therm_throt_en)) return 0; + intel_hfi_init(); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online", thermal_throttle_online, thermal_throttle_offline); -- GitLab From 2d74e6319abe278981e79166b6c2d0c3ed39b1ae Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Thu, 27 Jan 2022 11:34:51 -0800 Subject: [PATCH 0215/1586] thermal: intel: hfi: Handle CPU hotplug events All CPUs in a package are represented in an HFI table. There exists an HFI table per package. Thus, CPUs in a package need to coordinate to initialize and access the table. Do such coordination during CPU hotplug. Use the first CPU to come online in a package to initialize the HFI instance and the data structure representing it. Other CPUs in the same package need only to register or unregister themselves in that data structure. The HFI depends on both the package-level thermal management and the local APIC thermal local vector. Thus, to ensure that a CPU coming online has an associated HFI instance when the hardware issues an HFI event, enable the HFI only after having enabled the local APIC thermal vector. The thermal throttle driver takes care of the needed package-level initialization. Reviewed-by: Len Brown Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_hfi.c | 205 ++++++++++++++++++++++++++++ drivers/thermal/intel/intel_hfi.h | 4 + drivers/thermal/intel/therm_throt.c | 9 ++ 3 files changed, 218 insertions(+) diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 969f4a1509730..d36858474f746 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -23,14 +23,24 @@ #include #include +#include +#include +#include #include +#include +#include #include #include #include #include +#include + #include "intel_hfi.h" +/* Hardware Feedback Interface MSR configuration bits */ +#define HW_FEEDBACK_PTR_VALID_BIT BIT(0) + /* CPUID detection and enumeration definitions for HFI */ #define CPUID_HFI_LEAF 6 @@ -86,6 +96,8 @@ struct hfi_hdr { * Located at the base of the local table. * @hdr: Base address of the header of the local table * @data: Base address of the data of the local table + * @cpus: CPUs represented in this HFI table instance + * @hw_table: Pointer to the HFI table of this instance * * A set of parameters to parse and navigate a specific HFI table. */ @@ -96,6 +108,8 @@ struct hfi_instance { }; void *hdr; void *data; + cpumask_var_t cpus; + void *hw_table; }; /** @@ -113,10 +127,179 @@ struct hfi_features { unsigned int hdr_size; }; +/** + * struct hfi_cpu_info - Per-CPU attributes to consume HFI data + * @index: Row of this CPU in its HFI table + * @hfi_instance: Attributes of the HFI table to which this CPU belongs + * + * Parameters to link a logical processor to an HFI table and a row within it. + */ +struct hfi_cpu_info { + s16 index; + struct hfi_instance *hfi_instance; +}; + +static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; + static int max_hfi_instances; static struct hfi_instance *hfi_instances; static struct hfi_features hfi_features; +static DEFINE_MUTEX(hfi_instance_lock); + +static void init_hfi_cpu_index(struct hfi_cpu_info *info) +{ + union cpuid6_edx edx; + + /* Do not re-read @cpu's index if it has already been initialized. */ + if (info->index > -1) + return; + + edx.full = cpuid_edx(CPUID_HFI_LEAF); + info->index = edx.split.index; +} + +/* + * The format of the HFI table depends on the number of capabilities that the + * hardware supports. Keep a data structure to navigate the table. + */ +static void init_hfi_instance(struct hfi_instance *hfi_instance) +{ + /* The HFI header is below the time-stamp. */ + hfi_instance->hdr = hfi_instance->local_table + + sizeof(*hfi_instance->timestamp); + + /* The HFI data starts below the header. */ + hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; +} + +/** + * intel_hfi_online() - Enable HFI on @cpu + * @cpu: CPU in which the HFI will be enabled + * + * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package + * level. The first CPU in the die/package to come online does the full HFI + * initialization. Subsequent CPUs will just link themselves to the HFI + * instance of their die/package. + * + * This function is called before enabling the thermal vector in the local APIC + * in order to ensure that @cpu has an associated HFI instance when it receives + * an HFI event. + */ +void intel_hfi_online(unsigned int cpu) +{ + struct hfi_instance *hfi_instance; + struct hfi_cpu_info *info; + phys_addr_t hw_table_pa; + u64 msr_val; + u16 die_id; + + /* Nothing to do if hfi_instances are missing. */ + if (!hfi_instances) + return; + + /* + * Link @cpu to the HFI instance of its package/die. It does not + * matter whether the instance has been initialized. + */ + info = &per_cpu(hfi_cpu_info, cpu); + die_id = topology_logical_die_id(cpu); + hfi_instance = info->hfi_instance; + if (!hfi_instance) { + if (die_id < 0 || die_id >= max_hfi_instances) + return; + + hfi_instance = &hfi_instances[die_id]; + info->hfi_instance = hfi_instance; + } + + init_hfi_cpu_index(info); + + /* + * Now check if the HFI instance of the package/die of @cpu has been + * initialized (by checking its header). In such case, all we have to + * do is to add @cpu to this instance's cpumask. + */ + mutex_lock(&hfi_instance_lock); + if (hfi_instance->hdr) { + cpumask_set_cpu(cpu, hfi_instance->cpus); + goto unlock; + } + + /* + * Hardware is programmed with the physical address of the first page + * frame of the table. Hence, the allocated memory must be page-aligned. + */ + hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, + GFP_KERNEL | __GFP_ZERO); + if (!hfi_instance->hw_table) + goto unlock; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + + /* + * Allocate memory to keep a local copy of the table that + * hardware generates. + */ + hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, + GFP_KERNEL); + if (!hfi_instance->local_table) + goto free_hw_table; + + /* + * Program the address of the feedback table of this die/package. On + * some processors, hardware remembers the old address of the HFI table + * even after having been reprogrammed and re-enabled. Thus, do not free + * the pages allocated for the table or reprogram the hardware with a + * new base address. Namely, program the hardware only once. + */ + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); + + init_hfi_instance(hfi_instance); + + cpumask_set_cpu(cpu, hfi_instance->cpus); + +unlock: + mutex_unlock(&hfi_instance_lock); + return; + +free_hw_table: + free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); + goto unlock; +} + +/** + * intel_hfi_offline() - Disable HFI on @cpu + * @cpu: CPU in which the HFI will be disabled + * + * Remove @cpu from those covered by its HFI instance. + * + * On some processors, hardware remembers previous programming settings even + * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the + * die/package of @cpu are offline. See note in intel_hfi_online(). + */ +void intel_hfi_offline(unsigned int cpu) +{ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); + struct hfi_instance *hfi_instance; + + /* + * Check if @cpu as an associated, initialized (i.e., with a non-NULL + * header). Also, HFI instances are only initialized if X86_FEATURE_HFI + * is present. + */ + hfi_instance = info->hfi_instance; + if (!hfi_instance) + return; + + if (!hfi_instance->hdr) + return; + + mutex_lock(&hfi_instance_lock); + cpumask_clear_cpu(cpu, hfi_instance->cpus); + mutex_unlock(&hfi_instance_lock); +} static __init int hfi_parse_features(void) { @@ -165,6 +348,9 @@ static __init int hfi_parse_features(void) void __init intel_hfi_init(void) { + struct hfi_instance *hfi_instance; + int i, j; + if (hfi_parse_features()) return; @@ -178,4 +364,23 @@ void __init intel_hfi_init(void) */ hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), GFP_KERNEL); + if (!hfi_instances) + return; + + for (i = 0; i < max_hfi_instances; i++) { + hfi_instance = &hfi_instances[i]; + if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) + goto err_nomem; + } + + return; + +err_nomem: + for (j = 0; j < i; ++j) { + hfi_instance = &hfi_instances[j]; + free_cpumask_var(hfi_instance->cpus); + } + + kfree(hfi_instances); + hfi_instances = NULL; } diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h index 05f748b48a4e4..56c6b2d752027 100644 --- a/drivers/thermal/intel/intel_hfi.h +++ b/drivers/thermal/intel/intel_hfi.h @@ -4,8 +4,12 @@ #if defined(CONFIG_INTEL_HFI_THERMAL) void __init intel_hfi_init(void); +void intel_hfi_online(unsigned int cpu); +void intel_hfi_offline(unsigned int cpu); #else static inline void intel_hfi_init(void) { } +static inline void intel_hfi_online(unsigned int cpu) { } +static inline void intel_hfi_offline(unsigned int cpu) { } #endif /* CONFIG_INTEL_HFI_THERMAL */ #endif /* _INTEL_HFI_H */ diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index ac408714d52b8..8571e57a013a3 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -476,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu) INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work); INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work); + /* + * The first CPU coming online will enable the HFI. Usually this causes + * hardware to issue an HFI thermal interrupt. Such interrupt will reach + * the CPU once we enable the thermal vector in the local APIC. + */ + intel_hfi_online(cpu); + /* Unmask the thermal vector after the above workqueues are initialized. */ l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); @@ -493,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); + intel_hfi_offline(cpu); + cancel_delayed_work_sync(&state->package_throttle.therm_work); cancel_delayed_work_sync(&state->core_throttle.therm_work); -- GitLab From ab09b0744a9944cbdc0ac9a5cb00bef72adf79d5 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Thu, 27 Jan 2022 11:34:52 -0800 Subject: [PATCH 0216/1586] thermal: intel: hfi: Enable notification interrupt When hardware wants to inform the operating system about updates in the HFI table, it issues a package-level thermal event interrupt. For this, hardware has new interrupt and status bits in the IA32_PACKAGE_THERM_ INTERRUPT and IA32_PACKAGE_THERM_STATUS registers. The existing thermal throttle driver already handles thermal event interrupts: it initializes the thermal vector of the local APIC as well as per-CPU and package-level interrupt reporting. It also provides routines to service such interrupts. Extend its functionality to also handle HFI interrupts. The frequency of the thermal HFI interrupt is specific to each processor model. On some processors, a single interrupt happens as soon as the HFI is enabled and hardware will never update HFI capabilities afterwards. On other processors, thermal and power constraints may cause thermal HFI interrupts every tens of milliseconds. To not overwhelm consumers of the HFI data, use delayed work to throttle the rate at which HFI updates are processed. Use a dedicated workqueue to not overload system_wq if hardware issues many HFI updates. Reviewed-by: Len Brown Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_hfi.c | 110 ++++++++++++++++++++++++++++ drivers/thermal/intel/intel_hfi.h | 2 + drivers/thermal/intel/therm_throt.c | 10 +++ 3 files changed, 122 insertions(+) diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index d36858474f746..7a2b750eecc45 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -26,20 +26,28 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include +#include #include #include "intel_hfi.h" +#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \ + BIT(9) | BIT(11) | BIT(26)) + /* Hardware Feedback Interface MSR configuration bits */ #define HW_FEEDBACK_PTR_VALID_BIT BIT(0) +#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) /* CPUID detection and enumeration definitions for HFI */ @@ -98,6 +106,9 @@ struct hfi_hdr { * @data: Base address of the data of the local table * @cpus: CPUs represented in this HFI table instance * @hw_table: Pointer to the HFI table of this instance + * @update_work: Delayed work to process HFI updates + * @table_lock: Lock to protect acceses to the table of this instance + * @event_lock: Lock to process HFI interrupts * * A set of parameters to parse and navigate a specific HFI table. */ @@ -110,6 +121,9 @@ struct hfi_instance { void *data; cpumask_var_t cpus; void *hw_table; + struct delayed_work update_work; + raw_spinlock_t table_lock; + raw_spinlock_t event_lock; }; /** @@ -147,6 +161,86 @@ static struct hfi_instance *hfi_instances; static struct hfi_features hfi_features; static DEFINE_MUTEX(hfi_instance_lock); +static struct workqueue_struct *hfi_updates_wq; +#define HFI_UPDATE_INTERVAL HZ + +static void hfi_update_work_fn(struct work_struct *work) +{ + struct hfi_instance *hfi_instance; + + hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, + update_work); + if (!hfi_instance) + return; + + /* TODO: Consume update here. */ +} + +void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) +{ + struct hfi_instance *hfi_instance; + int cpu = smp_processor_id(); + struct hfi_cpu_info *info; + u64 new_timestamp; + + if (!pkg_therm_status_msr_val) + return; + + info = &per_cpu(hfi_cpu_info, cpu); + if (!info) + return; + + /* + * A CPU is linked to its HFI instance before the thermal vector in the + * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL + * when receiving an HFI event. + */ + hfi_instance = info->hfi_instance; + if (unlikely(!hfi_instance)) { + pr_debug("Received event on CPU %d but instance was null", cpu); + return; + } + + /* + * On most systems, all CPUs in the package receive a package-level + * thermal interrupt when there is an HFI update. It is sufficient to + * let a single CPU to acknowledge the update and queue work to + * process it. The remaining CPUs can resume their work. + */ + if (!raw_spin_trylock(&hfi_instance->event_lock)) + return; + + /* Skip duplicated updates. */ + new_timestamp = *(u64 *)hfi_instance->hw_table; + if (*hfi_instance->timestamp == new_timestamp) { + raw_spin_unlock(&hfi_instance->event_lock); + return; + } + + raw_spin_lock(&hfi_instance->table_lock); + + /* + * Copy the updated table into our local copy. This includes the new + * timestamp. + */ + memcpy(hfi_instance->local_table, hfi_instance->hw_table, + hfi_features.nr_table_pages << PAGE_SHIFT); + + raw_spin_unlock(&hfi_instance->table_lock); + raw_spin_unlock(&hfi_instance->event_lock); + + /* + * Let hardware know that we are done reading the HFI table and it is + * free to update it again. + */ + pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK & + ~PACKAGE_THERM_STATUS_HFI_UPDATED; + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val); + + queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, + HFI_UPDATE_INTERVAL); +} + static void init_hfi_cpu_index(struct hfi_cpu_info *info) { union cpuid6_edx edx; @@ -258,8 +352,20 @@ void intel_hfi_online(unsigned int cpu) init_hfi_instance(hfi_instance); + INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); + raw_spin_lock_init(&hfi_instance->table_lock); + raw_spin_lock_init(&hfi_instance->event_lock); + cpumask_set_cpu(cpu, hfi_instance->cpus); + /* + * Enable the hardware feedback interface and never disable it. See + * comment on programming the address of the table. + */ + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + unlock: mutex_unlock(&hfi_instance_lock); return; @@ -373,6 +479,10 @@ void __init intel_hfi_init(void) goto err_nomem; } + hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); + if (!hfi_updates_wq) + goto err_nomem; + return; err_nomem: diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h index 56c6b2d752027..325aa78b745cf 100644 --- a/drivers/thermal/intel/intel_hfi.h +++ b/drivers/thermal/intel/intel_hfi.h @@ -6,10 +6,12 @@ void __init intel_hfi_init(void); void intel_hfi_online(unsigned int cpu); void intel_hfi_offline(unsigned int cpu); +void intel_hfi_process_event(__u64 pkg_therm_status_msr_val); #else static inline void intel_hfi_init(void) { } static inline void intel_hfi_online(unsigned int cpu) { } static inline void intel_hfi_offline(unsigned int cpu) { } +static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { } #endif /* CONFIG_INTEL_HFI_THERMAL */ #endif /* _INTEL_HFI_H */ diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index 8571e57a013a3..8352083b87c7c 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -620,6 +620,10 @@ void intel_thermal_interrupt(void) PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, PACKAGE_LEVEL); + + if (this_cpu_has(X86_FEATURE_HFI)) + intel_hfi_process_event(msr_val & + PACKAGE_THERM_STATUS_HFI_UPDATED); } } @@ -729,6 +733,12 @@ void intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l | (PACKAGE_THERM_INT_LOW_ENABLE | PACKAGE_THERM_INT_HIGH_ENABLE), h); + + if (cpu_has(c, X86_FEATURE_HFI)) { + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | PACKAGE_THERM_INT_HFI_ENABLE, h); + } } rdmsr(MSR_IA32_MISC_ENABLE, l, h); -- GitLab From e4b1eb24ce5a696ef7229f9926ff34d7502f0582 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 27 Jan 2022 11:34:53 -0800 Subject: [PATCH 0217/1586] thermal: netlink: Add a new event to notify CPU capabilities change Add a new netlink event to notify change in CPU capabilities in terms of performance and efficiency. Firmware may change CPU capabilities as a result of thermal events in the system or to account for changes in the TDP (thermal design power) level. This notification type will allow user space to avoid running workloads on certain CPUs or proactively adjust power limits to avoid future events. The netlink message consists of a nested attribute (THERMAL_GENL_ATTR_CPU_CAPABILITY) with three attributes: * THERMAL_GENL_ATTR_CPU_CAPABILITY_ID (type u32): -- logical CPU number * THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE (type u32): -- Scaled performance from 0-1023 * THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY (type u32): -- Scaled efficiency from 0-1023 Reviewed-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_netlink.c | 53 +++++++++++++++++++++++++++++++ drivers/thermal/thermal_netlink.h | 14 ++++++++ include/uapi/linux/thermal.h | 6 +++- 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index a16dd4d5d710e..7c97a091680ef 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING, .len = THERMAL_NAME_LENGTH }, + /* CPU capabilities */ + [THERMAL_GENL_ATTR_CPU_CAPABILITY] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_CPU_CAPABILITY_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY] = { .type = NLA_U32 }, }; struct param { @@ -58,6 +63,8 @@ struct param { int temp; int cdev_state; int cdev_max_state; + struct thermal_genl_cpu_caps *cpu_capabilities; + int cpu_capabilities_count; }; typedef int (*cb_t)(struct param *); @@ -190,6 +197,42 @@ static int thermal_genl_event_gov_change(struct param *p) return 0; } +static int thermal_genl_event_cpu_capability_change(struct param *p) +{ + struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities; + struct sk_buff *msg = p->msg; + struct nlattr *start_cap; + int i; + + start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY); + if (!start_cap) + return -EMSGSIZE; + + for (i = 0; i < p->cpu_capabilities_count; ++i) { + if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + cpu_cap->cpu)) + goto out_cancel_nest; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + cpu_cap->performance)) + goto out_cancel_nest; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, + cpu_cap->efficiency)) + goto out_cancel_nest; + + ++cpu_cap; + } + + nla_nest_end(msg, start_cap); + + return 0; +out_cancel_nest: + nla_nest_cancel(msg, start_cap); + + return -EMSGSIZE; +} + int thermal_genl_event_tz_delete(struct param *p) __attribute__((alias("thermal_genl_event_tz"))); @@ -219,6 +262,7 @@ static cb_t event_cb[] = { [THERMAL_GENL_EVENT_CDEV_DELETE] = thermal_genl_event_cdev_delete, [THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update, [THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = thermal_genl_event_gov_change, + [THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change, }; /* @@ -356,6 +400,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name) return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p); } +int thermal_genl_cpu_capability_event(int count, + struct thermal_genl_cpu_caps *caps) +{ + struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p); +} +EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event); + /*************************** Command encoding ********************************/ static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz, diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index e554f76291f41..04d1adbbc0124 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -4,6 +4,12 @@ * Author: Daniel Lezcano */ +struct thermal_genl_cpu_caps { + int cpu; + int performance; + int efficiency; +}; + /* Netlink notification function */ #ifdef CONFIG_THERMAL_NETLINK int __init thermal_netlink_init(void); @@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state); int thermal_notify_cdev_delete(int cdev_id); int thermal_notify_tz_gov_change(int tz_id, const char *name); int thermal_genl_sampling_temp(int id, int temp); +int thermal_genl_cpu_capability_event(int count, + struct thermal_genl_cpu_caps *caps); #else static inline int thermal_netlink_init(void) { @@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp) { return 0; } + +static inline int thermal_genl_cpu_capability_event(int count, struct cpu_capability *caps) +{ + return 0; +} + #endif /* CONFIG_THERMAL_NETLINK */ diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 9aa2fedfa309b..fc78bf3aead79 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -44,7 +44,10 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CDEV_MAX_STATE, THERMAL_GENL_ATTR_CDEV_NAME, THERMAL_GENL_ATTR_GOV_NAME, - + THERMAL_GENL_ATTR_CPU_CAPABILITY, + THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -71,6 +74,7 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ + THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) -- GitLab From bd30cdfd9bd73b68e4977ce7c5540aa7b14c25cd Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 27 Jan 2022 11:34:54 -0800 Subject: [PATCH 0218/1586] thermal: intel: hfi: Notify user space for HFI events When the hardware issues an HFI event, relay a notification to user space. This allows user space to respond by reading performance and efficiency of each CPU and take appropriate action. For example, when the performance and efficiency of a CPU is 0, user space can either offline the CPU or inject idle. Also, if user space notices a downward trend in performance, it may proactively adjust power limits to avoid future situations in which performance drops to 0. To avoid excessive notifications, the rate is limited by one HZ per event. To limit the netlink message size, send parameters for up to 16 CPUs in a single message. If there are more than 16 CPUs, issue as many messages as needed to notify the status of all CPUs. In the HFI specification, both performance and efficiency capabilities are defined in the [0, 255] range. The existing implementations of HFI hardware do not scale the maximum values to 255. Since userspace cares about capability values that are either 0 or show a downward/upward trend, this fact does not matter much. Relative changes in capabilities are enough. To comply with the thermal netlink ABI, scale both performance and efficiency capabilities to the [0, 1023] interval. Reviewed-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/Kconfig | 1 + drivers/thermal/intel/intel_hfi.c | 75 ++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index e9d2925227d4f..6cf3fe36a4ae8 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -104,6 +104,7 @@ config INTEL_HFI_THERMAL bool "Intel Hardware Feedback Interface" depends on CPU_SUP_INTEL depends on X86_THERMAL_VECTOR + select THERMAL_NETLINK help Select this option to enable the Hardware Feedback Interface. If selected, hardware provides guidance to the operating system on diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 7a2b750eecc45..730fd121df6ec 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -40,6 +40,7 @@ #include +#include "../thermal_core.h" #include "intel_hfi.h" #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \ @@ -163,6 +164,78 @@ static DEFINE_MUTEX(hfi_instance_lock); static struct workqueue_struct *hfi_updates_wq; #define HFI_UPDATE_INTERVAL HZ +#define HFI_MAX_THERM_NOTIFY_COUNT 16 + +static void get_hfi_caps(struct hfi_instance *hfi_instance, + struct thermal_genl_cpu_caps *cpu_caps) +{ + int cpu, i = 0; + + raw_spin_lock_irq(&hfi_instance->table_lock); + for_each_cpu(cpu, hfi_instance->cpus) { + struct hfi_cpu_data *caps; + s16 index; + + index = per_cpu(hfi_cpu_info, cpu).index; + caps = hfi_instance->data + index * hfi_features.cpu_stride; + cpu_caps[i].cpu = cpu; + + /* + * Scale performance and energy efficiency to + * the [0, 1023] interval that thermal netlink uses. + */ + cpu_caps[i].performance = caps->perf_cap << 2; + cpu_caps[i].efficiency = caps->ee_cap << 2; + + ++i; + } + raw_spin_unlock_irq(&hfi_instance->table_lock); +} + +/* + * Call update_capabilities() when there are changes in the HFI table. + */ +static void update_capabilities(struct hfi_instance *hfi_instance) +{ + struct thermal_genl_cpu_caps *cpu_caps; + int i = 0, cpu_count; + + /* CPUs may come online/offline while processing an HFI update. */ + mutex_lock(&hfi_instance_lock); + + cpu_count = cpumask_weight(hfi_instance->cpus); + + /* No CPUs to report in this hfi_instance. */ + if (!cpu_count) + goto out; + + cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); + if (!cpu_caps) + goto out; + + get_hfi_caps(hfi_instance, cpu_caps); + + if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) + goto last_cmd; + + /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ + for (i = 0; + (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; + i += HFI_MAX_THERM_NOTIFY_COUNT) + thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, + &cpu_caps[i]); + + cpu_count = cpu_count - i; + +last_cmd: + /* Process the remaining capabilities if any. */ + if (cpu_count) + thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); + + kfree(cpu_caps); +out: + mutex_unlock(&hfi_instance_lock); +} static void hfi_update_work_fn(struct work_struct *work) { @@ -173,7 +246,7 @@ static void hfi_update_work_fn(struct work_struct *work) if (!hfi_instance) return; - /* TODO: Consume update here. */ + update_capabilities(hfi_instance); } void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) -- GitLab From 2651bf680bc2ad9a078b7222b0873145ab4ece07 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Feb 2022 11:28:25 -0800 Subject: [PATCH 0219/1586] block: introduce BLK_STS_OFFLINE Currently, drivers reports BLK_STS_IOERR for devices that are not full online or being removed. This behavior could cause confusion for users, as they are not really I/O errors from the device. Solve this issue with a new state BLK_STS_OFFLINE, which reports "device offline error" in dmesg instead of "I/O error". EIO is intentionally kept to not change user visible return value. Signed-off-by: Song Liu Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220203192827.1370270-2-song@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + include/linux/blk_types.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 61f6a0dc4511a..24035dd2eef17 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -164,6 +164,7 @@ static const struct { [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" }, [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" }, + [BLK_STS_OFFLINE] = { -EIO, "device offline" }, /* device mapper special case, should not leak out: */ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff6..5561e58d158ac 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -153,6 +153,13 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) +/* + * BLK_STS_OFFLINE is returned from the driver when the target device is offline + * or is being taken offline. This could help differentiate the case where a + * device is intentionally being shut down from a real I/O error. + */ +#define BLK_STS_OFFLINE ((__force blk_status_t)17) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with -- GitLab From 7d32c027a21ef7aa0a400763397644d44b3576a9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Feb 2022 11:28:26 -0800 Subject: [PATCH 0220/1586] block: return -ENODEV for BLK_STS_OFFLINE Change the user visible return value for BLK_STS_OFFLINE to -ENODEV, which is more descriptive than existing -EIO. Signed-off-by: Song Liu Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220203192827.1370270-3-song@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 24035dd2eef17..be8812f5489d4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -164,7 +164,7 @@ static const struct { [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" }, [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" }, - [BLK_STS_OFFLINE] = { -EIO, "device offline" }, + [BLK_STS_OFFLINE] = { -ENODEV, "device offline" }, /* device mapper special case, should not leak out: */ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, -- GitLab From 9574d43479e16352e75bc875c9952ed8e129c9b2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Feb 2022 11:28:27 -0800 Subject: [PATCH 0221/1586] scsi: use BLK_STS_OFFLINE for not fully online devices The new error message for such case looks like [ 172.809565] device offline error, dev sda, sector 3138208 ... which will not be confused with regular I/O error (BLK_STS_IOERR). Reviewed-by: Hannes Reinecke Signed-off-by: Song Liu Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220203192827.1370270-4-song@kernel.org Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0a70aa763a961..e30bc51578e93 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1276,7 +1276,7 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req) * power management commands. */ if (req && !(req->rq_flags & RQF_PM)) - return BLK_STS_IOERR; + return BLK_STS_OFFLINE; return BLK_STS_OK; } } -- GitLab From 6d18762ed5cd549fde74fd0e05d4d87bac5a3beb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 11:21:14 +0800 Subject: [PATCH 0222/1586] f2fs: fix to unlock page correctly in error path of is_alive() As Pavel Machek reported in below link [1]: After commit 77900c45ee5c ("f2fs: fix to do sanity check in is_alive()"), node page should be unlock via calling f2fs_put_page() in the error path of is_alive(), otherwise, f2fs may hang when it tries to lock the node page, fix it. [1] https://lore.kernel.org/stable/20220124203637.GA19321@duo.ucw.cz/ Fixes: 77900c45ee5c ("f2fs: fix to do sanity check in is_alive()") Cc: Reported-by: Pavel Machek Signed-off-by: Pavel Machek Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0a6b0a8ae97ee..2d53ef121e76e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1038,8 +1038,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } - if (f2fs_check_nid_range(sbi, dni->ino)) + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); return false; + } *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); -- GitLab From 430f163b01888dc26696365d9c1053ba9d6c7d92 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 08:34:10 +0800 Subject: [PATCH 0223/1586] f2fs: adjust readahead block number during recovery In a fragmented image, entries in dnode block list may locate in incontiguous physical block address space, however, in recovery flow, we will always readahead BIO_MAX_VECS size blocks, so in such case, current readahead policy is low efficient, let's adjust readahead window size dynamically based on consecutiveness of dnode blocks. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++-- fs/f2fs/f2fs.h | 6 +++++- fs/f2fs/recovery.c | 27 ++++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index deeda95688f03..a13b6b4af220a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -282,18 +282,22 @@ out: return blkno - start; } -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks) { struct page *page; bool readahead = false; + if (ra_blocks == RECOVERY_MIN_RA_BLOCKS) + return; + page = find_get_page(META_MAPPING(sbi), index); if (!page || !PageUptodate(page)) readahead = true; f2fs_put_page(page, 0); if (readahead) - f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true); + f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 63c90416364bc..51c1392708e6e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -590,6 +590,9 @@ enum { /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS +#define RECOVERY_MIN_RA_BLOCKS 1 + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -3651,7 +3654,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks); long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type); void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 10d152cfa58d1..2af503f75b4fa 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -343,6 +343,19 @@ static int recover_inode(struct inode *inode, struct page *page) return 0; } +static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, + unsigned int ra_blocks, unsigned int blkaddr, + unsigned int next_blkaddr) +{ + if (blkaddr + 1 == next_blkaddr) + ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, + ra_blocks * 2); + else if (next_blkaddr % sbi->blocks_per_seg) + ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, + ra_blocks / 2); + return ra_blocks; +} + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool check_only) { @@ -350,6 +363,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct page *page = NULL; block_t blkaddr; unsigned int loop_cnt = 0; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - valid_user_blocks(sbi); int err = 0; @@ -424,11 +438,14 @@ next: break; } + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(page)); + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); - f2fs_ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } return err; } @@ -704,6 +721,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct page *page = NULL; int err = 0; block_t blkaddr; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -715,8 +733,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - f2fs_ra_meta_pages_cond(sbi, blkaddr); - page = f2fs_get_tmp_page(sbi, blkaddr); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -759,9 +775,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); next: + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(page)); + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } if (!err) f2fs_allocate_new_segments(sbi); -- GitLab From 78e3437450be5236c4949e377c9b848bbcd4fcb0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Feb 2022 08:19:34 +0100 Subject: [PATCH 0224/1586] block: call bio_associate_blkg from bio_reset Call bio_associate_blkg just like bio_set_dev did in the callers before the conversion to set the block device in bio_reset. Fixes: a7c50c940477 ("block: pass a block_device and opf to bio_reset") Reported-by: syzbot+2b3f18414c37b42dcc94@syzkaller.appspotmail.com Tested-by: syzbot+2b3f18414c37b42dcc94@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Tested-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220204071934.168469-1-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/bio.c b/block/bio.c index 2e19ca600fcdb..d2f3c10350364 100644 --- a/block/bio.c +++ b/block/bio.c @@ -310,6 +310,8 @@ void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf) memset(bio, 0, BIO_RESET_BYTES); atomic_set(&bio->__bi_remaining, 1); bio->bi_bdev = bdev; + if (bio->bi_bdev) + bio_associate_blkg(bio); bio->bi_opf = opf; } EXPORT_SYMBOL(bio_reset); -- GitLab From c347a787e34cba0e5a80a04082dacaf259105605 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:00:57 +0100 Subject: [PATCH 0225/1586] drbd: set ->bi_bdev in drbd_req_new Make sure the newly allocated bio has the correct bi_bdev set from the start. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3235532ae0778..8d44e96c4c4ef 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -31,6 +31,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio memset(req, 0, sizeof(*req)); req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set); + bio_set_dev(req->private_bio, device->ldev->backing_bdev); req->private_bio->bi_private = req; req->private_bio->bi_end_io = drbd_request_endio; @@ -1151,8 +1152,6 @@ drbd_submit_req_private_bio(struct drbd_request *req) else type = DRBD_FAULT_DT_RD; - bio_set_dev(bio, device->ldev->backing_bdev); - /* State may have changed since we grabbed our reference on the * ->ldev member. Double check, and short-circuit to endio. * In case the last activity log transaction failed to get on -- GitLab From 6c23f0bd7f16d88c774db37b30c5da82811c41be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:00:58 +0100 Subject: [PATCH 0226/1586] dm: add a clone_to_tio helper Add a helper to stop open coding the container_of operations to get from the clone bio to the tio structure. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fa596b654c99c..5543e18f3c3bc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -79,10 +79,14 @@ struct clone_info { #define DM_IO_BIO_OFFSET \ (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio)) +static inline struct dm_target_io *clone_to_tio(struct bio *clone) +{ + return container_of(clone, struct dm_target_io, clone); +} + void *dm_per_bio_data(struct bio *bio, size_t data_size) { - struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); - if (!tio->inside_dm_io) + if (!clone_to_tio(bio)->inside_dm_io) return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size; return (char *)bio - DM_IO_BIO_OFFSET - data_size; } @@ -477,10 +481,7 @@ out: u64 dm_start_time_ns_from_clone(struct bio *bio) { - struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); - struct dm_io *io = tio->io; - - return jiffies_to_nsecs(io->start_time); + return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time); } EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); @@ -521,7 +522,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) clone = bio_alloc_bioset(NULL, 0, 0, GFP_NOIO, &md->io_bs); - tio = container_of(clone, struct dm_target_io, clone); + tio = clone_to_tio(clone); tio->inside_dm_io = true; tio->io = NULL; @@ -557,7 +558,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t if (!clone) return NULL; - tio = container_of(clone, struct dm_target_io, clone); + tio = clone_to_tio(clone); tio->inside_dm_io = false; } @@ -878,7 +879,7 @@ static bool swap_bios_limit(struct dm_target *ti, struct bio *bio) static void clone_endio(struct bio *bio) { blk_status_t error = bio->bi_status; - struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + struct dm_target_io *tio = clone_to_tio(bio); struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; @@ -1084,7 +1085,7 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, */ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) { - struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + struct dm_target_io *tio = clone_to_tio(bio); unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; BUG_ON(bio->bi_opf & REQ_PREFLUSH); @@ -1257,10 +1258,8 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, if (bio_nr == num_bios) return; - while ((bio = bio_list_pop(blist))) { - tio = container_of(bio, struct dm_target_io, clone); - free_tio(tio); - } + while ((bio = bio_list_pop(blist))) + free_tio(clone_to_tio(bio)); } } @@ -1282,14 +1281,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, { struct bio_list blist = BIO_EMPTY_LIST; struct bio *bio; - struct dm_target_io *tio; alloc_multiple_bios(&blist, ci, ti, num_bios); - while ((bio = bio_list_pop(&blist))) { - tio = container_of(bio, struct dm_target_io, clone); - __clone_and_map_simple_bio(ci, tio, len); - } + while ((bio = bio_list_pop(&blist))) + __clone_and_map_simple_bio(ci, clone_to_tio(bio), len); } static int __send_empty_flush(struct clone_info *ci) -- GitLab From b1bee79237ce0ab43ef7fe66aa6e5c4783165012 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:00:59 +0100 Subject: [PATCH 0227/1586] dm: fold clone_bio into __clone_and_map_data_bio Fold clone_bio into its only caller to prepare for refactoring. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5543e18f3c3bc..9384d250a3e4e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1190,17 +1190,22 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) /* * Creates a bio that consists of range of complete bvecs. */ -static int clone_bio(struct dm_target_io *tio, struct bio *bio, - sector_t sector, unsigned len) +static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, + sector_t sector, unsigned *len) { - struct bio *clone = &tio->clone; + struct bio *bio = ci->bio, *clone; + struct dm_target_io *tio; int r; + tio = alloc_tio(ci, ti, 0, GFP_NOIO); + tio->len_ptr = len; + + clone = &tio->clone; __bio_clone_fast(clone, bio); r = bio_crypt_clone(clone, bio, GFP_NOIO); if (r < 0) - return r; + goto free_tio; if (bio_integrity(bio)) { if (unlikely(!dm_target_has_integrity(tio->ti->type) && @@ -1208,21 +1213,26 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, DMWARN("%s: the target %s doesn't support integrity data.", dm_device_name(tio->io->md), tio->ti->type->name); - return -EIO; + r = -EIO; + goto free_tio; } r = bio_integrity_clone(clone, bio, GFP_NOIO); if (r < 0) - return r; + goto free_tio; } bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); - clone->bi_iter.bi_size = to_bytes(len); + clone->bi_iter.bi_size = to_bytes(*len); if (bio_integrity(bio)) bio_integrity_trim(clone); + __map_bio(tio); return 0; +free_tio: + free_tio(tio); + return r; } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, @@ -1313,25 +1323,6 @@ static int __send_empty_flush(struct clone_info *ci) return 0; } -static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, unsigned *len) -{ - struct bio *bio = ci->bio; - struct dm_target_io *tio; - int r; - - tio = alloc_tio(ci, ti, 0, GFP_NOIO); - tio->len_ptr = len; - r = clone_bio(tio, bio, sector, *len); - if (r < 0) { - free_tio(tio); - return r; - } - __map_bio(tio); - - return 0; -} - static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, unsigned num_bios) { -- GitLab From 8eabf5d0a7bd9226d6cc25402dde67f372aae838 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:00 +0100 Subject: [PATCH 0228/1586] dm: fold __send_duplicate_bios into __clone_and_map_simple_bio Fold __send_duplicate_bios into its only caller to prepare for refactoring. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9384d250a3e4e..2527b287ead0f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1273,29 +1273,24 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } } -static void __clone_and_map_simple_bio(struct clone_info *ci, - struct dm_target_io *tio, unsigned *len) -{ - struct bio *clone = &tio->clone; - - tio->len_ptr = len; - - __bio_clone_fast(clone, ci->bio); - if (len) - bio_setup_sector(clone, ci->sector, *len); - __map_bio(tio); -} - static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, unsigned num_bios, unsigned *len) { struct bio_list blist = BIO_EMPTY_LIST; - struct bio *bio; + struct bio *clone; alloc_multiple_bios(&blist, ci, ti, num_bios); - while ((bio = bio_list_pop(&blist))) - __clone_and_map_simple_bio(ci, clone_to_tio(bio), len); + while ((clone = bio_list_pop(&blist))) { + struct dm_target_io *tio = clone_to_tio(clone); + + tio->len_ptr = len; + + __bio_clone_fast(clone, ci->bio); + if (len) + bio_setup_sector(clone, ci->sector, *len); + __map_bio(tio); + } } static int __send_empty_flush(struct clone_info *ci) -- GitLab From dc8e2021da71f6b2d5971f98ee3e528cf30c409c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:01 +0100 Subject: [PATCH 0229/1586] dm: move cloning the bio into alloc_tio Move the call to __bio_clone_fast and the assignment of ->len_ptr from the callers into alloc_tio to prepare for changes to the bio clone API. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2527b287ead0f..90341b7fa5809 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -545,7 +545,7 @@ static void free_io(struct mapped_device *md, struct dm_io *io) } static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti, - unsigned target_bio_nr, gfp_t gfp_mask) + unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask) { struct dm_target_io *tio; @@ -561,11 +561,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t tio = clone_to_tio(clone); tio->inside_dm_io = false; } + __bio_clone_fast(&tio->clone, ci->bio); tio->magic = DM_TIO_MAGIC; tio->io = ci->io; tio->ti = ti; tio->target_bio_nr = target_bio_nr; + tio->len_ptr = len; return tio; } @@ -1197,11 +1199,8 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, struct dm_target_io *tio; int r; - tio = alloc_tio(ci, ti, 0, GFP_NOIO); - tio->len_ptr = len; - + tio = alloc_tio(ci, ti, 0, len, GFP_NOIO); clone = &tio->clone; - __bio_clone_fast(clone, bio); r = bio_crypt_clone(clone, bio, GFP_NOIO); if (r < 0) @@ -1236,7 +1235,8 @@ free_tio: } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, - struct dm_target *ti, unsigned num_bios) + struct dm_target *ti, unsigned num_bios, + unsigned *len) { struct dm_target_io *tio; int try; @@ -1245,7 +1245,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, return; if (num_bios == 1) { - tio = alloc_tio(ci, ti, 0, GFP_NOIO); + tio = alloc_tio(ci, ti, 0, len, GFP_NOIO); bio_list_add(blist, &tio->clone); return; } @@ -1257,7 +1257,8 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, if (try) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { - tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT); + tio = alloc_tio(ci, ti, bio_nr, len, + try ? GFP_NOIO : GFP_NOWAIT); if (!tio) break; @@ -1279,14 +1280,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; - alloc_multiple_bios(&blist, ci, ti, num_bios); + alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { struct dm_target_io *tio = clone_to_tio(clone); - tio->len_ptr = len; - - __bio_clone_fast(clone, ci->bio); if (len) bio_setup_sector(clone, ci->sector, *len); __map_bio(tio); -- GitLab From 1561b396106d759fdf5f9a71b412e068f74d2cc9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:02 +0100 Subject: [PATCH 0230/1586] dm: pass the bio instead of tio to __map_bio This simplifies the callers a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 90341b7fa5809..a43d280e9bc54 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1117,11 +1117,11 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) mutex_unlock(&md->swap_bios_lock); } -static void __map_bio(struct dm_target_io *tio) +static void __map_bio(struct bio *clone) { + struct dm_target_io *tio = clone_to_tio(clone); int r; sector_t sector; - struct bio *clone = &tio->clone; struct dm_io *io = tio->io; struct dm_target *ti = tio->ti; @@ -1227,7 +1227,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, if (bio_integrity(bio)) bio_integrity_trim(clone); - __map_bio(tio); + __map_bio(clone); return 0; free_tio: free_tio(tio); @@ -1283,11 +1283,9 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { - struct dm_target_io *tio = clone_to_tio(clone); - if (len) bio_setup_sector(clone, ci->sector, *len); - __map_bio(tio); + __map_bio(clone); } } -- GitLab From 1d1068cecff70cb8e48c7cb0ba27cc3fd906eb31 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:03 +0100 Subject: [PATCH 0231/1586] dm: retun the clone bio from alloc_tio Return the clone bio embedded into the tio as that is what the callers actually want. Similar for the free side. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a43d280e9bc54..c05b6ff1bb957 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -544,7 +544,7 @@ static void free_io(struct mapped_device *md, struct dm_io *io) bio_put(&io->tio.clone); } -static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti, +static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask) { struct dm_target_io *tio; @@ -569,14 +569,14 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t tio->target_bio_nr = target_bio_nr; tio->len_ptr = len; - return tio; + return &tio->clone; } -static void free_tio(struct dm_target_io *tio) +static void free_tio(struct bio *clone) { - if (tio->inside_dm_io) + if (clone_to_tio(clone)->inside_dm_io) return; - bio_put(&tio->clone); + bio_put(clone); } /* @@ -932,7 +932,7 @@ static void clone_endio(struct bio *bio) up(&md->swap_bios_semaphore); } - free_tio(tio); + free_tio(bio); dm_io_dec_pending(io, error); } @@ -1166,7 +1166,7 @@ static void __map_bio(struct bio *clone) struct mapped_device *md = io->md; up(&md->swap_bios_semaphore); } - free_tio(tio); + free_tio(clone); dm_io_dec_pending(io, BLK_STS_IOERR); break; case DM_MAPIO_REQUEUE: @@ -1174,7 +1174,7 @@ static void __map_bio(struct bio *clone) struct mapped_device *md = io->md; up(&md->swap_bios_semaphore); } - free_tio(tio); + free_tio(clone); dm_io_dec_pending(io, BLK_STS_DM_REQUEUE); break; default: @@ -1196,17 +1196,17 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, sector_t sector, unsigned *len) { struct bio *bio = ci->bio, *clone; - struct dm_target_io *tio; int r; - tio = alloc_tio(ci, ti, 0, len, GFP_NOIO); - clone = &tio->clone; + clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); r = bio_crypt_clone(clone, bio, GFP_NOIO); if (r < 0) goto free_tio; if (bio_integrity(bio)) { + struct dm_target_io *tio = clone_to_tio(clone); + if (unlikely(!dm_target_has_integrity(tio->ti->type) && !dm_target_passes_integrity(tio->ti->type))) { DMWARN("%s: the target %s doesn't support integrity data.", @@ -1230,7 +1230,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, __map_bio(clone); return 0; free_tio: - free_tio(tio); + free_tio(clone); return r; } @@ -1238,31 +1238,30 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct dm_target *ti, unsigned num_bios, unsigned *len) { - struct dm_target_io *tio; + struct bio *bio; int try; if (!num_bios) return; if (num_bios == 1) { - tio = alloc_tio(ci, ti, 0, len, GFP_NOIO); - bio_list_add(blist, &tio->clone); + bio = alloc_tio(ci, ti, 0, len, GFP_NOIO); + bio_list_add(blist, bio); return; } for (try = 0; try < 2; try++) { int bio_nr; - struct bio *bio; if (try) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { - tio = alloc_tio(ci, ti, bio_nr, len, + bio = alloc_tio(ci, ti, bio_nr, len, try ? GFP_NOIO : GFP_NOWAIT); - if (!tio) + if (!bio) break; - bio_list_add(blist, &tio->clone); + bio_list_add(blist, bio); } if (try) mutex_unlock(&ci->io->md->table_devices_lock); @@ -1270,7 +1269,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, return; while ((bio = bio_list_pop(blist))) - free_tio(clone_to_tio(bio)); + free_tio(bio); } } -- GitLab From 891fced644a7529bfd4b1436b2341527ce8f68ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:04 +0100 Subject: [PATCH 0232/1586] dm: simplify the single bio fast path in __send_duplicate_bios Most targets just need a single flush bio. Open code that case in __send_duplicate_bios without the need to add the bio to a list. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c05b6ff1bb957..78df75f57288b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1241,15 +1241,6 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct bio *bio; int try; - if (!num_bios) - return; - - if (num_bios == 1) { - bio = alloc_tio(ci, ti, 0, len, GFP_NOIO); - bio_list_add(blist, bio); - return; - } - for (try = 0; try < 2; try++) { int bio_nr; @@ -1279,12 +1270,23 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; - alloc_multiple_bios(&blist, ci, ti, num_bios, len); - - while ((clone = bio_list_pop(&blist))) { + switch (num_bios) { + case 0: + break; + case 1: + clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); if (len) bio_setup_sector(clone, ci->sector, *len); __map_bio(clone); + break; + default: + alloc_multiple_bios(&blist, ci, ti, num_bios, len); + while ((clone = bio_list_pop(&blist))) { + if (len) + bio_setup_sector(clone, ci->sector, *len); + __map_bio(clone); + } + break; } } -- GitLab From 3c4b455ef8acdacd0e5ecd33428d4f32f861637a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:05 +0100 Subject: [PATCH 0233/1586] dm-cache: remove __remap_to_origin_clear_discard Fold __remap_to_origin_clear_discard into the two callers to prepare for bio cloning refactoring. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-cache-target.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 447d030036d18..1c37fe028e531 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -744,21 +744,14 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) spin_unlock_irq(&cache->lock); } -static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, - dm_oblock_t oblock, bool bio_has_pbd) -{ - if (bio_has_pbd) - check_if_tick_bio_needed(cache, bio); - remap_to_origin(cache, bio); - if (bio_data_dir(bio) == WRITE) - clear_discard(cache, oblock_to_dblock(cache, oblock)); -} - static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, dm_oblock_t oblock) { // FIXME: check_if_tick_bio_needed() is called way too much through this interface - __remap_to_origin_clear_discard(cache, bio, oblock, true); + check_if_tick_bio_needed(cache, bio); + remap_to_origin(cache, bio); + if (bio_data_dir(bio) == WRITE) + clear_discard(cache, oblock_to_dblock(cache, oblock)); } static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, @@ -831,11 +824,10 @@ static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio, BUG_ON(!origin_bio); bio_chain(origin_bio, bio); - /* - * Passing false to __remap_to_origin_clear_discard() skips - * all code that might use per_bio_data (since clone doesn't have it) - */ - __remap_to_origin_clear_discard(cache, origin_bio, oblock, false); + + remap_to_origin(cache, origin_bio); + if (bio_data_dir(origin_bio) == WRITE) + clear_discard(cache, oblock_to_dblock(cache, oblock)); submit_bio(origin_bio); remap_to_cache(cache, bio, cblock); -- GitLab From 56b4b5abcdab6daf71c5536fca2772f178590e06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:06 +0100 Subject: [PATCH 0234/1586] block: clone crypto and integrity data in __bio_clone_fast __bio_clone_fast should also clone integrity and crypto data, as a clone without those is incomplete. Right now the only caller that can actually support crypto and integrity data (dm) does it manually for the one callchain that supports these, but we better do it properly in the core. Note that all callers except for the above mentioned one also don't need to handle failure at all, given that the integrity and crypto clones are based on mempool allocations that won't fail for sleeping allocations. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-11-hch@lst.de Signed-off-by: Jens Axboe --- block/bio-integrity.c | 1 - block/bio.c | 26 +++++++++++++------------- block/blk-crypto.c | 1 - drivers/md/bcache/request.c | 2 +- drivers/md/dm.c | 33 ++++++--------------------------- drivers/md/md-multipath.c | 2 +- include/linux/bio.h | 2 +- 7 files changed, 22 insertions(+), 45 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index d251147154592..bd54532200650 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -420,7 +420,6 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, return 0; } -EXPORT_SYMBOL(bio_integrity_clone); int bioset_integrity_create(struct bio_set *bs, int pool_size) { diff --git a/block/bio.c b/block/bio.c index d2f3c10350364..2a921875bb420 100644 --- a/block/bio.c +++ b/block/bio.c @@ -732,6 +732,7 @@ EXPORT_SYMBOL(bio_put); * __bio_clone_fast - clone a bio that shares the original bio's biovec * @bio: destination bio * @bio_src: bio to clone + * @gfp: allocation flags * * Clone a &bio. Caller will own the returned bio, but not * the actual data it points to. Reference count of returned @@ -739,7 +740,7 @@ EXPORT_SYMBOL(bio_put); * * Caller must ensure that @bio_src is not freed before @bio. */ -void __bio_clone_fast(struct bio *bio, struct bio *bio_src) +int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp) { WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs); @@ -761,6 +762,13 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio_clone_blkg_association(bio, bio_src); blkcg_bio_issue_init(bio); + + if (bio_crypt_clone(bio, bio_src, gfp) < 0) + return -ENOMEM; + if (bio_integrity(bio_src) && + bio_integrity_clone(bio, bio_src, gfp) < 0) + return -ENOMEM; + return 0; } EXPORT_SYMBOL(__bio_clone_fast); @@ -780,20 +788,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) if (!b) return NULL; - __bio_clone_fast(b, bio); - - if (bio_crypt_clone(b, bio, gfp_mask) < 0) - goto err_put; - - if (bio_integrity(bio) && - bio_integrity_clone(b, bio, gfp_mask) < 0) - goto err_put; + if (__bio_clone_fast(b, bio, gfp_mask < 0)) { + bio_put(b); + return NULL; + } return b; - -err_put: - bio_put(b); - return NULL; } EXPORT_SYMBOL(bio_clone_fast); diff --git a/block/blk-crypto.c b/block/blk-crypto.c index ec9efeeeca918..773dae4c329ba 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -111,7 +111,6 @@ int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) *dst->bi_crypt_context = *src->bi_crypt_context; return 0; } -EXPORT_SYMBOL_GPL(__bio_crypt_clone); /* Increments @dun by @inc, treating @dun as a multi-limb integer. */ void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 7ba59d08ed870..574b02b94f1a4 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -686,7 +686,7 @@ static void do_bio_hook(struct search *s, struct bio *bio = &s->bio.bio; bio_init(bio, NULL, NULL, 0, 0); - __bio_clone_fast(bio, orig_bio); + __bio_clone_fast(bio, orig_bio, GFP_NOIO); /* * bi_end_io can be set separately somewhere else, e.g. the * variants in, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 78df75f57288b..0f8796159379e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -561,7 +561,12 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio = clone_to_tio(clone); tio->inside_dm_io = false; } - __bio_clone_fast(&tio->clone, ci->bio); + + if (__bio_clone_fast(&tio->clone, ci->bio, gfp_mask) < 0) { + if (ci->io->tio.io) + bio_put(&tio->clone); + return NULL; + } tio->magic = DM_TIO_MAGIC; tio->io = ci->io; @@ -1196,31 +1201,8 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, sector_t sector, unsigned *len) { struct bio *bio = ci->bio, *clone; - int r; clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - - r = bio_crypt_clone(clone, bio, GFP_NOIO); - if (r < 0) - goto free_tio; - - if (bio_integrity(bio)) { - struct dm_target_io *tio = clone_to_tio(clone); - - if (unlikely(!dm_target_has_integrity(tio->ti->type) && - !dm_target_passes_integrity(tio->ti->type))) { - DMWARN("%s: the target %s doesn't support integrity data.", - dm_device_name(tio->io->md), - tio->ti->type->name); - r = -EIO; - goto free_tio; - } - - r = bio_integrity_clone(clone, bio, GFP_NOIO); - if (r < 0) - goto free_tio; - } - bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); clone->bi_iter.bi_size = to_bytes(*len); @@ -1229,9 +1211,6 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, __map_bio(clone); return 0; -free_tio: - free_tio(clone); - return r; } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 5e15940634d85..010c759c741ad 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -122,7 +122,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) multipath = conf->multipaths + mp_bh->path; bio_init(&mp_bh->bio, NULL, NULL, 0, 0); - __bio_clone_fast(&mp_bh->bio, bio); + __bio_clone_fast(&mp_bh->bio, bio, GFP_NOIO); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; bio_set_dev(&mp_bh->bio, multipath->rdev->bdev); diff --git a/include/linux/bio.h b/include/linux/bio.h index 18cfe5bb41ea8..b814361c957b0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -413,7 +413,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -extern void __bio_clone_fast(struct bio *, struct bio *); +int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio_set fs_bio_set; -- GitLab From 92986f6b4c8a2c24d3a36b80140624f80fd93de4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:07 +0100 Subject: [PATCH 0235/1586] dm: use bio_clone_fast in alloc_io/alloc_tio Replace open coded bio_clone_fast implementations with the actual helper. Note that the bio allocated as part of the dm_io structure in alloc_io will only actually be used later in alloc_tio, making this earlier cloning of the information safe. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-12-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0f8796159379e..862564a5df74b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -520,7 +520,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_bioset(NULL, 0, 0, GFP_NOIO, &md->io_bs); + clone = bio_clone_fast(bio, GFP_NOIO, &md->io_bs); tio = clone_to_tio(clone); tio->inside_dm_io = true; @@ -553,8 +553,8 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, /* the dm_target_io embedded in ci->io is available */ tio = &ci->io->tio; } else { - struct bio *clone = bio_alloc_bioset(NULL, 0, 0, gfp_mask, - &ci->io->md->bs); + struct bio *clone = bio_clone_fast(ci->bio, gfp_mask, + &ci->io->md->bs); if (!clone) return NULL; @@ -562,12 +562,6 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio->inside_dm_io = false; } - if (__bio_clone_fast(&tio->clone, ci->bio, gfp_mask) < 0) { - if (ci->io->tio.io) - bio_put(&tio->clone); - return NULL; - } - tio->magic = DM_TIO_MAGIC; tio->io = ci->io; tio->ti = ti; -- GitLab From a0e8de798dd6710a69d69ec57b246a0e34c4a695 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:08 +0100 Subject: [PATCH 0236/1586] block: initialize the target bio in __bio_clone_fast All callers of __bio_clone_fast initialize the bio first. Move that initialization into __bio_clone_fast instead. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-13-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 75 ++++++++++++++++++++----------------- drivers/md/bcache/request.c | 1 - drivers/md/md-multipath.c | 1 - 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/block/bio.c b/block/bio.c index 2a921875bb420..74f66e22ef630 100644 --- a/block/bio.c +++ b/block/bio.c @@ -728,37 +728,16 @@ void bio_put(struct bio *bio) } EXPORT_SYMBOL(bio_put); -/** - * __bio_clone_fast - clone a bio that shares the original bio's biovec - * @bio: destination bio - * @bio_src: bio to clone - * @gfp: allocation flags - * - * Clone a &bio. Caller will own the returned bio, but not - * the actual data it points to. Reference count of returned - * bio will be one. - * - * Caller must ensure that @bio_src is not freed before @bio. - */ -int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp) +static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) { - WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs); - - /* - * most users will be overriding ->bi_bdev with a new target, - * so we don't set nor calculate new physical/hw segment counts here - */ - bio->bi_bdev = bio_src->bi_bdev; bio_set_flag(bio, BIO_CLONED); if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); if (bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); - bio->bi_opf = bio_src->bi_opf; bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; - bio->bi_io_vec = bio_src->bi_io_vec; bio_clone_blkg_association(bio, bio_src); blkcg_bio_issue_init(bio); @@ -770,33 +749,59 @@ int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp) return -ENOMEM; return 0; } -EXPORT_SYMBOL(__bio_clone_fast); /** - * bio_clone_fast - clone a bio that shares the original bio's biovec - * @bio: bio to clone - * @gfp_mask: allocation priority - * @bs: bio_set to allocate from + * bio_clone_fast - clone a bio that shares the original bio's biovec + * @bio_src: bio to clone from + * @gfp: allocation priority + * @bs: bio_set to allocate from + * + * Allocate a new bio that is a clone of @bio_src. The caller owns the returned + * bio, but not the actual data it points to. * - * Like __bio_clone_fast, only also allocates the returned bio + * The caller must ensure that the return bio is not freed before @bio_src. */ -struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) +struct bio *bio_clone_fast(struct bio *bio_src, gfp_t gfp, struct bio_set *bs) { - struct bio *b; + struct bio *bio; - b = bio_alloc_bioset(NULL, 0, 0, gfp_mask, bs); - if (!b) + bio = bio_alloc_bioset(bio_src->bi_bdev, 0, bio_src->bi_opf, gfp, bs); + if (!bio) return NULL; - if (__bio_clone_fast(b, bio, gfp_mask < 0)) { - bio_put(b); + if (__bio_clone(bio, bio_src, gfp) < 0) { + bio_put(bio); return NULL; } + bio->bi_io_vec = bio_src->bi_io_vec; - return b; + return bio; } EXPORT_SYMBOL(bio_clone_fast); +/** + * __bio_clone_fast - clone a bio that shares the original bio's biovec + * @bio: bio to clone into + * @bio_src: bio to clone from + * @gfp: allocation priority + * + * Initialize a new bio in caller provided memory that is a clone of @bio_src. + * The caller owns the returned bio, but not the actual data it points to. + * + * The caller must ensure that @bio_src is not freed before @bio. + */ +int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp) +{ + int ret; + + bio_init(bio, bio_src->bi_bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf); + ret = __bio_clone(bio, bio_src, gfp); + if (ret) + bio_uninit(bio); + return ret; +} +EXPORT_SYMBOL(__bio_clone_fast); + const char *bio_devname(struct bio *bio, char *buf) { return bdevname(bio->bi_bdev, buf); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 574b02b94f1a4..d2cb853bf9173 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -685,7 +685,6 @@ static void do_bio_hook(struct search *s, { struct bio *bio = &s->bio.bio; - bio_init(bio, NULL, NULL, 0, 0); __bio_clone_fast(bio, orig_bio, GFP_NOIO); /* * bi_end_io can be set separately somewhere else, e.g. the diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 010c759c741ad..483a5500f83cd 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -121,7 +121,6 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - bio_init(&mp_bh->bio, NULL, NULL, 0, 0); __bio_clone_fast(&mp_bh->bio, bio, GFP_NOIO); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; -- GitLab From abfc426d1b2fb2176df59851a64223b58ddae7e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2022 17:01:09 +0100 Subject: [PATCH 0237/1586] block: pass a block_device to bio_clone_fast Pass a block_device to bio_clone_fast and __bio_clone_fast and give the functions more suitable names. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220202160109.108149-14-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/block/biodoc.rst | 5 ----- block/bio.c | 31 +++++++++++++++++------------ block/blk-mq.c | 4 ++-- block/bounce.c | 3 +-- drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 4 ++-- drivers/block/pktcdvd.c | 4 ++-- drivers/md/bcache/request.c | 5 +++-- drivers/md/dm-cache-target.c | 4 ++-- drivers/md/dm-crypt.c | 11 +++++------ drivers/md/dm-zoned-target.c | 3 +-- drivers/md/dm.c | 6 +++--- drivers/md/md-faulty.c | 4 ++-- drivers/md/md-multipath.c | 3 +-- drivers/md/md.c | 5 +++-- drivers/md/raid1.c | 34 ++++++++++++++++---------------- drivers/md/raid10.c | 16 +++++++-------- drivers/md/raid5.c | 4 ++-- fs/btrfs/extent_io.c | 4 ++-- include/linux/bio.h | 6 ++++-- 20 files changed, 80 insertions(+), 80 deletions(-) diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst index 2098477851a4b..4fbc367e62f95 100644 --- a/Documentation/block/biodoc.rst +++ b/Documentation/block/biodoc.rst @@ -663,11 +663,6 @@ to i/o submission, if the bio fields are likely to be accessed after the i/o is issued (since the bio may otherwise get freed in case i/o completion happens in the meantime). -The bio_clone_fast() routine may be used to duplicate a bio, where the clone -shares the bio_vec_list with the original bio (i.e. both point to the -same bio_vec_list). This would typically be used for splitting i/o requests -in lvm or md. - 3.2 Generic bio helper Routines ------------------------------- diff --git a/block/bio.c b/block/bio.c index 74f66e22ef630..18d34b33351b8 100644 --- a/block/bio.c +++ b/block/bio.c @@ -733,7 +733,8 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) bio_set_flag(bio, BIO_CLONED); if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); - if (bio_flagged(bio_src, BIO_REMAPPED)) + if (bio->bi_bdev == bio_src->bi_bdev && + bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; @@ -751,7 +752,8 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) } /** - * bio_clone_fast - clone a bio that shares the original bio's biovec + * bio_alloc_clone - clone a bio that shares the original bio's biovec + * @bdev: block_device to clone onto * @bio_src: bio to clone from * @gfp: allocation priority * @bs: bio_set to allocate from @@ -761,11 +763,12 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) * * The caller must ensure that the return bio is not freed before @bio_src. */ -struct bio *bio_clone_fast(struct bio *bio_src, gfp_t gfp, struct bio_set *bs) +struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, + gfp_t gfp, struct bio_set *bs) { struct bio *bio; - bio = bio_alloc_bioset(bio_src->bi_bdev, 0, bio_src->bi_opf, gfp, bs); + bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs); if (!bio) return NULL; @@ -777,10 +780,11 @@ struct bio *bio_clone_fast(struct bio *bio_src, gfp_t gfp, struct bio_set *bs) return bio; } -EXPORT_SYMBOL(bio_clone_fast); +EXPORT_SYMBOL(bio_alloc_clone); /** - * __bio_clone_fast - clone a bio that shares the original bio's biovec + * bio_init_clone - clone a bio that shares the original bio's biovec + * @bdev: block_device to clone onto * @bio: bio to clone into * @bio_src: bio to clone from * @gfp: allocation priority @@ -790,17 +794,18 @@ EXPORT_SYMBOL(bio_clone_fast); * * The caller must ensure that @bio_src is not freed before @bio. */ -int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp) +int bio_init_clone(struct block_device *bdev, struct bio *bio, + struct bio *bio_src, gfp_t gfp) { int ret; - bio_init(bio, bio_src->bi_bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf); + bio_init(bio, bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf); ret = __bio_clone(bio, bio_src, gfp); if (ret) bio_uninit(bio); return ret; } -EXPORT_SYMBOL(__bio_clone_fast); +EXPORT_SYMBOL(bio_init_clone); const char *bio_devname(struct bio *bio, char *buf) { @@ -1572,7 +1577,7 @@ struct bio *bio_split(struct bio *bio, int sectors, if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND)) return NULL; - split = bio_clone_fast(bio, gfp, bs); + split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs); if (!split) return NULL; @@ -1667,9 +1672,9 @@ EXPORT_SYMBOL(bioset_exit); * Note that the bio must be embedded at the END of that structure always, * or things will break badly. * If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated - * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast(). - * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to - * dispatch queued requests when the mempool runs out of space. + * for allocating iovecs. This pool is not needed e.g. for bio_init_clone(). + * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used + * to dispatch queued requests when the mempool runs out of space. * */ int bioset_init(struct bio_set *bs, diff --git a/block/blk-mq.c b/block/blk-mq.c index 1adfe4824ef5e..4b868e792ba4a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2975,10 +2975,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, bs = &fs_bio_set; __rq_for_each_bio(bio_src, rq_src) { - bio = bio_clone_fast(bio_src, gfp_mask, bs); + bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask, + bs); if (!bio) goto free_and_out; - bio->bi_bdev = rq->q->disk->part0; if (bio_ctr && bio_ctr(bio, bio_src, data)) goto free_and_out; diff --git a/block/bounce.c b/block/bounce.c index 330ddde25b460..3fd3bc6fd5dbb 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -162,8 +162,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) * that does not own the bio - reason being drivers don't use it for * iterating over the biovec anymore, so expecting it to be kept up * to date (i.e. for clones that share the parent biovec) is just - * asking for trouble and would force extra work on - * __bio_clone_fast() anyways. + * asking for trouble and would force extra work. */ bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src), bio_src->bi_opf, GFP_NOIO, &bounce_bio_set); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8d44e96c4c4ef..c00ae8619519e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -30,8 +30,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio return NULL; memset(req, 0, sizeof(*req)); - req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set); - bio_set_dev(req->private_bio, device->ldev->backing_bdev); + req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src, + GFP_NOIO, &drbd_io_bio_set); req->private_bio->bi_private = req; req->private_bio->bi_end_io = drbd_request_endio; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 64563bfdf0da0..a5e04b38006b6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1523,9 +1523,9 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) drbd_al_begin_io(device, &req->i); - req->private_bio = bio_clone_fast(req->master_bio, GFP_NOIO, + req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, + req->master_bio, GFP_NOIO, &drbd_io_bio_set); - bio_set_dev(req->private_bio, device->ldev->backing_bdev); req->private_bio->bi_private = req; req->private_bio->bi_end_io = drbd_request_endio; submit_bio_noacct(req->private_bio); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 3aa5954429462..be749c686feb7 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2294,12 +2294,12 @@ static void pkt_end_io_read_cloned(struct bio *bio) static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) { - struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set); + struct bio *cloned_bio = + bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set); struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); psd->pd = pd; psd->bio = bio; - bio_set_dev(cloned_bio, pd->bdev); cloned_bio->bi_private = psd; cloned_bio->bi_end_io = pkt_end_io_read_cloned; pd->stats.secs_r += bio_sectors(bio); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index d2cb853bf9173..6869e010475a3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s, { struct bio *bio = &s->bio.bio; - __bio_clone_fast(bio, orig_bio, GFP_NOIO); + bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO); /* * bi_end_io can be set separately somewhere else, e.g. the * variants in, @@ -1036,7 +1036,8 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) closure_bio_submit(s->iop.c, flush, cl); } } else { - s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split); + s->iop.bio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, + &dc->disk.bio_split); /* I/O request sent to backing device */ bio->bi_end_io = backing_request_endio; closure_bio_submit(s->iop.c, bio, cl); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1c37fe028e531..89fdfb49d564e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -819,13 +819,13 @@ static void issue_op(struct bio *bio, void *context) static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio, dm_oblock_t oblock, dm_cblock_t cblock) { - struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs); + struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio, + GFP_NOIO, &cache->bs); BUG_ON(!origin_bio); bio_chain(origin_bio, bio); - remap_to_origin(cache, origin_bio); if (bio_data_dir(origin_bio) == WRITE) clear_discard(cache, oblock_to_dblock(cache, oblock)); submit_bio(origin_bio); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f7e4435b7439a..a5006cb6ee8ad 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1834,17 +1834,16 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct bio *clone; /* - * We need the original biovec array in order to decrypt - * the whole bio data *afterwards* -- thanks to immutable - * biovecs we don't need to worry about the block layer - * modifying the biovec array; so leverage bio_clone_fast(). + * We need the original biovec array in order to decrypt the whole bio + * data *afterwards* -- thanks to immutable biovecs we don't need to + * worry about the block layer modifying the biovec array; so leverage + * bio_alloc_clone(). */ - clone = bio_clone_fast(io->base_bio, gfp, &cc->bs); + clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs); if (!clone) return 1; clone->bi_private = io; clone->bi_end_io = crypt_endio; - bio_set_dev(clone, cc->dev->bdev); crypt_inc_pending(io); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 166c4e9d99c97..a3f6d3ef38174 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -125,11 +125,10 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, if (dev->flags & DMZ_BDEV_DYING) return -EIO; - clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set); + clone = bio_alloc_clone(dev->bdev, bio, GFP_NOIO, &dmz->bio_set); if (!clone) return -ENOMEM; - bio_set_dev(clone, dev->bdev); bioctx->dev = dev; clone->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 862564a5df74b..ab9cc91931f99 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -520,7 +520,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct dm_target_io *tio; struct bio *clone; - clone = bio_clone_fast(bio, GFP_NOIO, &md->io_bs); + clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &md->io_bs); tio = clone_to_tio(clone); tio->inside_dm_io = true; @@ -553,8 +553,8 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, /* the dm_target_io embedded in ci->io is available */ tio = &ci->io->tio; } else { - struct bio *clone = bio_clone_fast(ci->bio, gfp_mask, - &ci->io->md->bs); + struct bio *clone = bio_alloc_clone(ci->bio->bi_bdev, ci->bio, + gfp_mask, &ci->io->md->bs); if (!clone) return NULL; diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c index c0dc6f2ef4a3d..50ad818978a43 100644 --- a/drivers/md/md-faulty.c +++ b/drivers/md/md-faulty.c @@ -205,9 +205,9 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio) } } if (failit) { - struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO, + &mddev->bio_set); - bio_set_dev(b, conf->rdev->bdev); b->bi_private = bio; b->bi_end_io = faulty_fail; bio = b; diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 483a5500f83cd..97fb948e3e741 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -121,10 +121,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - __bio_clone_fast(&mp_bh->bio, bio, GFP_NOIO); + bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; - bio_set_dev(&mp_bh->bio, multipath->rdev->bdev); mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; diff --git a/drivers/md/md.c b/drivers/md/md.c index 0a89f072dae0d..f88a9e948f3eb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8634,13 +8634,14 @@ static void md_end_io_acct(struct bio *bio) */ void md_account_bio(struct mddev *mddev, struct bio **bio) { + struct block_device *bdev = (*bio)->bi_bdev; struct md_io_acct *md_io_acct; struct bio *clone; - if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue)) + if (!blk_queue_io_stat(bdev->bd_disk->queue)) return; - clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set); + clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set); md_io_acct = container_of(clone, struct md_io_acct, bio_clone); md_io_acct->orig_bio = *bio; md_io_acct->start_time = bio_start_io_acct(*bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e7710fb5befb4..c3288d46948de 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1320,13 +1320,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) r1_bio->start_time = bio_start_io_acct(bio); - read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); + read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, + &mddev->bio_set); r1_bio->bios[rdisk] = read_bio; read_bio->bi_iter.bi_sector = r1_bio->sector + mirror->rdev->data_offset; - bio_set_dev(read_bio, mirror->rdev->bdev); read_bio->bi_end_io = raid1_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &mirror->rdev->flags) && @@ -1546,24 +1546,25 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, first_clone = 0; } - if (r1_bio->behind_master_bio) - mbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, &mddev->bio_set); - else - mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); - if (r1_bio->behind_master_bio) { + mbio = bio_alloc_clone(rdev->bdev, + r1_bio->behind_master_bio, + GFP_NOIO, &mddev->bio_set); if (test_bit(CollisionCheck, &rdev->flags)) wait_for_serialization(rdev, r1_bio); if (test_bit(WriteMostly, &rdev->flags)) atomic_inc(&r1_bio->behind_remaining); - } else if (mddev->serialize_policy) - wait_for_serialization(rdev, r1_bio); + } else { + mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, + &mddev->bio_set); + + if (mddev->serialize_policy) + wait_for_serialization(rdev, r1_bio); + } r1_bio->bios[i] = mbio; mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); - bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid1_end_write_request; mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); if (test_bit(FailFast, &rdev->flags) && @@ -2416,12 +2417,12 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) /* Write at 'sector' for 'sectors'*/ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - wbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, - &mddev->bio_set); + wbio = bio_alloc_clone(rdev->bdev, + r1_bio->behind_master_bio, + GFP_NOIO, &mddev->bio_set); } else { - wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, - &mddev->bio_set); + wbio = bio_alloc_clone(rdev->bdev, r1_bio->master_bio, + GFP_NOIO, &mddev->bio_set); } bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); @@ -2430,7 +2431,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; - bio_set_dev(wbio, rdev->bdev); if (submit_bio_wait(wbio) < 0) /* failure! */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index da07bcbc06d08..5dd2e17e1d0ea 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1208,14 +1208,13 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) r10_bio->start_time = bio_start_io_acct(bio); - read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); + read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].rdev = rdev; read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); - bio_set_dev(read_bio, rdev->bdev); read_bio->bi_end_io = raid10_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &rdev->flags) && @@ -1255,7 +1254,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, } else rdev = conf->mirrors[devnum].rdev; - mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); if (replacement) r10_bio->devs[n_copy].repl_bio = mbio; else @@ -1263,7 +1262,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); - bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid10_end_write_request; bio_set_op_attrs(mbio, op, do_sync | do_fua); if (!replacement && test_bit(FailFast, @@ -1812,7 +1810,8 @@ retry_discard: */ if (r10_bio->devs[disk].bio) { struct md_rdev *rdev = conf->mirrors[disk].rdev; - mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + mbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, + &mddev->bio_set); mbio->bi_end_io = raid10_end_discard_request; mbio->bi_private = r10_bio; r10_bio->devs[disk].bio = mbio; @@ -1825,7 +1824,8 @@ retry_discard: } if (r10_bio->devs[disk].repl_bio) { struct md_rdev *rrdev = conf->mirrors[disk].replacement; - rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + rbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, + &mddev->bio_set); rbio->bi_end_io = raid10_end_discard_request; rbio->bi_private = r10_bio; r10_bio->devs[disk].repl_bio = rbio; @@ -2892,12 +2892,12 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ - wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + wbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, + &mddev->bio_set); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + choose_data_offset(r10_bio, rdev); - bio_set_dev(wbio, rdev->bdev); bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7c119208a2143..8891aaba65964 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5438,14 +5438,14 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) return 0; } - align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set); + align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO, + &mddev->io_acct_set); md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone); raid_bio->bi_next = (void *)rdev; if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue)) md_io_acct->start_time = bio_start_io_acct(raid_bio); md_io_acct->orig_bio = raid_bio; - bio_set_dev(align_bio, rdev->bdev); align_bio->bi_end_io = raid5_align_endio; align_bio->bi_private = md_io_acct; align_bio->bi_iter.bi_sector = sector; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 421d921a05716..dee86911a4bef 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3154,7 +3154,7 @@ struct bio *btrfs_bio_clone(struct bio *bio) struct bio *new; /* Bio allocation backed by a bioset does not fail */ - new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); + new = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOFS, &btrfs_bioset); bbio = btrfs_bio(new); btrfs_bio_init(bbio); bbio->iter = bio->bi_iter; @@ -3169,7 +3169,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) ASSERT(offset <= UINT_MAX && size <= UINT_MAX); /* this will never fail when it's backed by a bioset */ - bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset); + bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset); ASSERT(bio); bbio = btrfs_bio(bio); diff --git a/include/linux/bio.h b/include/linux/bio.h index b814361c957b0..7523aba4ddf7c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -413,8 +413,10 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp); -extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); +struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, + gfp_t gfp, struct bio_set *bs); +int bio_init_clone(struct block_device *bdev, struct bio *bio, + struct bio *bio_src, gfp_t gfp); extern struct bio_set fs_bio_set; -- GitLab From b9794a822281944ef3de5b1812a94cbdb8134320 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:33 +0100 Subject: [PATCH 0238/1586] powercap/drivers/dtpm: Convert the init table section to a simple array The init table section is freed after the system booted. However the next changes will make per module the DTPM description, so the table won't be accessible when the module is loaded. In order to fix that, we should move the table to the data section where there are very few entries and that makes strange to add it there. The main goal of the table was to keep self-encapsulated code and we can keep it almost as it by using an array instead. Suggested-by: Ulf Hansson Reviewed-by: Ulf Hansson Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220128163537.212248-2-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 2 ++ drivers/powercap/dtpm_cpu.c | 5 ++++- drivers/powercap/dtpm_subsys.h | 18 ++++++++++++++++++ include/asm-generic/vmlinux.lds.h | 11 ----------- include/linux/dtpm.h | 24 +++--------------------- 5 files changed, 27 insertions(+), 33 deletions(-) create mode 100644 drivers/powercap/dtpm_subsys.h diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 8cb45f2d3d78d..0e5c93443c707 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -24,6 +24,8 @@ #include #include +#include "dtpm_subsys.h" + #define DTPM_POWER_LIMIT_FLAG 0 static const char *constraint_name[] = { diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index b740866b228d9..5763e0ce2af56 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -269,4 +269,7 @@ static int __init dtpm_cpu_init(void) return 0; } -DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init); +struct dtpm_subsys_ops dtpm_cpu_ops = { + .name = KBUILD_MODNAME, + .init = dtpm_cpu_init, +}; diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h new file mode 100644 index 0000000000000..2a3a2055f60ed --- /dev/null +++ b/drivers/powercap/dtpm_subsys.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Linaro Ltd + * + * Author: Daniel Lezcano + */ +#ifndef ___DTPM_SUBSYS_H__ +#define ___DTPM_SUBSYS_H__ + +extern struct dtpm_subsys_ops dtpm_cpu_ops; + +struct dtpm_subsys_ops *dtpm_subsys[] = { +#ifdef CONFIG_DTPM_CPU + &dtpm_cpu_ops, +#endif +}; + +#endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca697..2a10db2f0bc5c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -321,16 +321,6 @@ #define THERMAL_TABLE(name) #endif -#ifdef CONFIG_DTPM -#define DTPM_TABLE() \ - . = ALIGN(8); \ - __dtpm_table = .; \ - KEEP(*(__dtpm_table)) \ - __dtpm_table_end = .; -#else -#define DTPM_TABLE() -#endif - #define KERNEL_DTB() \ STRUCT_ALIGN(); \ __dtb_start = .; \ @@ -723,7 +713,6 @@ ACPI_PROBE_TABLE(irqchip) \ ACPI_PROBE_TABLE(timer) \ THERMAL_TABLE(governor) \ - DTPM_TABLE() \ EARLYCON_TABLE() \ LSM_TABLE() \ EARLY_LSM_TABLE() \ diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index d37e5d06a357d..506048158a506 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -32,29 +32,11 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; -typedef int (*dtpm_init_t)(void); - -struct dtpm_descr { - dtpm_init_t init; +struct dtpm_subsys_ops { + const char *name; + int (*init)(void); }; -/* Init section thermal table */ -extern struct dtpm_descr __dtpm_table[]; -extern struct dtpm_descr __dtpm_table_end[]; - -#define DTPM_TABLE_ENTRY(name, __init) \ - static struct dtpm_descr __dtpm_table_entry_##name \ - __used __section("__dtpm_table") = { \ - .init = __init, \ - } - -#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init) - -#define for_each_dtpm_table(__dtpm) \ - for (__dtpm = __dtpm_table; \ - __dtpm < __dtpm_table_end; \ - __dtpm++) - static inline struct dtpm *to_dtpm(struct powercap_zone *zone) { return container_of(zone, struct dtpm, zone); -- GitLab From 3759ec678e8944dc2ea70cab77a300408f78ae27 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:34 +0100 Subject: [PATCH 0239/1586] powercap/drivers/dtpm: Add hierarchy creation The DTPM framework is available but without a way to configure it. This change provides a way to create a hierarchy of DTPM node where the power consumption reflects the sum of the children's power consumption. It is up to the platform to specify an array of dtpm nodes where each element has a pointer to its parent, except the top most one. The type of the node gives the indication of which initialization callback to call. At this time, we can create a virtual node, where its purpose is to be a parent in the hierarchy, and a DT node where the name describes its path. In order to ensure a nice self-encapsulation, the DTPM subsys array contains a couple of initialization functions, one to setup the DTPM backend and one to initialize it up. With this approach, the DTPM framework has a very few material to export. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220128163537.212248-3-daniel.lezcano@linaro.org --- drivers/powercap/Kconfig | 1 + drivers/powercap/dtpm.c | 190 ++++++++++++++++++++++++++++++++++++++- include/linux/dtpm.h | 15 ++++ 3 files changed, 203 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 8242e8c5ed77e..b1ca339957e3d 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -46,6 +46,7 @@ config IDLE_INJECT config DTPM bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)" + depends on OF help This enables support for the power capping for the dynamic thermal power management userspace engine. diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 0e5c93443c707..414826a1509b6 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "dtpm_subsys.h" @@ -463,14 +464,197 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) return 0; } -static int __init init_dtpm(void) +static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy, + struct dtpm *parent) { + struct dtpm *dtpm; + int ret; + + dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL); + if (!dtpm) + return ERR_PTR(-ENOMEM); + dtpm_init(dtpm, NULL); + + ret = dtpm_register(hierarchy->name, dtpm, parent); + if (ret) { + pr_err("Failed to register dtpm node '%s': %d\n", + hierarchy->name, ret); + kfree(dtpm); + return ERR_PTR(ret); + } + + return dtpm; +} + +static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy, + struct dtpm *parent) +{ + struct device_node *np; + int i, ret; + + np = of_find_node_by_path(hierarchy->name); + if (!np) { + pr_err("Failed to find '%s'\n", hierarchy->name); + return ERR_PTR(-ENXIO); + } + + for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) { + + if (!dtpm_subsys[i]->setup) + continue; + + ret = dtpm_subsys[i]->setup(parent, np); + if (ret) { + pr_err("Failed to setup '%s': %d\n", dtpm_subsys[i]->name, ret); + of_node_put(np); + return ERR_PTR(ret); + } + } + + of_node_put(np); + + /* + * By returning a NULL pointer, we let know the caller there + * is no child for us as we are a leaf of the tree + */ + return NULL; +} + +typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *); + +dtpm_node_callback_t dtpm_node_callback[] = { + [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual, + [DTPM_NODE_DT] = dtpm_setup_dt, +}; + +static int dtpm_for_each_child(const struct dtpm_node *hierarchy, + const struct dtpm_node *it, struct dtpm *parent) +{ + struct dtpm *dtpm; + int i, ret; + + for (i = 0; hierarchy[i].name; i++) { + + if (hierarchy[i].parent != it) + continue; + + dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent); + + /* + * A NULL pointer means there is no children, hence we + * continue without going deeper in the recursivity. + */ + if (!dtpm) + continue; + + /* + * There are multiple reasons why the callback could + * fail. The generic glue is abstracting the backend + * and therefore it is not possible to report back or + * take a decision based on the error. In any case, + * if this call fails, it is not critical in the + * hierarchy creation, we can assume the underlying + * service is not found, so we continue without this + * branch in the tree but with a warning to log the + * information the node was not created. + */ + if (IS_ERR(dtpm)) { + pr_warn("Failed to create '%s' in the hierarchy\n", + hierarchy[i].name); + continue; + } + + ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm); + if (ret) + return ret; + } + + return 0; +} + +/** + * dtpm_create_hierarchy - Create the dtpm hierarchy + * @hierarchy: An array of struct dtpm_node describing the hierarchy + * + * The function is called by the platform specific code with the + * description of the different node in the hierarchy. It creates the + * tree in the sysfs filesystem under the powercap dtpm entry. + * + * The expected tree has the format: + * + * struct dtpm_node hierarchy[] = { + * [0] { .name = "topmost", type = DTPM_NODE_VIRTUAL }, + * [1] { .name = "package", .type = DTPM_NODE_VIRTUAL, .parent = &hierarchy[0] }, + * [2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [6] { } + * }; + * + * The last element is always an empty one and marks the end of the + * array. + * + * Return: zero on success, a negative value in case of error. Errors + * are reported back from the underlying functions. + */ +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) +{ + const struct of_device_id *match; + const struct dtpm_node *hierarchy; + struct device_node *np; + int i, ret; + + if (pct) + return -EBUSY; + pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { pr_err("Failed to register control type\n"); - return PTR_ERR(pct); + ret = PTR_ERR(pct); + goto out_pct; + } + + ret = -ENODEV; + np = of_find_node_by_path("/"); + if (!np) + goto out_err; + + match = of_match_node(dtpm_match_table, np); + + of_node_put(np); + + if (!match) + goto out_err; + + hierarchy = match->data; + if (!hierarchy) { + ret = -EFAULT; + goto out_err; + } + + ret = dtpm_for_each_child(hierarchy, NULL, NULL); + if (ret) + goto out_err; + + for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) { + + if (!dtpm_subsys[i]->init) + continue; + + ret = dtpm_subsys[i]->init(); + if (ret) + pr_info("Failed to initialze '%s': %d", + dtpm_subsys[i]->name, ret); } return 0; + +out_err: + powercap_unregister_control_type(pct); +out_pct: + pct = NULL; + + return ret; } -late_initcall(init_dtpm); +EXPORT_SYMBOL_GPL(dtpm_create_hierarchy); diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index 506048158a506..f7a25c70dd4c0 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -32,9 +32,23 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; +struct device_node; + struct dtpm_subsys_ops { const char *name; int (*init)(void); + int (*setup)(struct dtpm *, struct device_node *); +}; + +enum DTPM_NODE_TYPE { + DTPM_NODE_VIRTUAL = 0, + DTPM_NODE_DT, +}; + +struct dtpm_node { + enum DTPM_NODE_TYPE type; + const char *name; + struct dtpm_node *parent; }; static inline struct dtpm *to_dtpm(struct powercap_zone *zone) @@ -52,4 +66,5 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); #endif -- GitLab From 73dbcb6e37bf0c43bac8c15fe5bcab2bec2367fb Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:35 +0100 Subject: [PATCH 0240/1586] powercap/drivers/dtpm: Add CPU DT initialization support Based on the previous DT changes in the core code, use the 'setup' callback to initialize the CPU DTPM backend. Code is reorganized to stick to the DTPM table description. No functional changes. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220128163537.212248-4-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 5763e0ce2af56..eed5ad688d467 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -176,6 +177,17 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu) } static int cpuhp_dtpm_cpu_online(unsigned int cpu) +{ + struct dtpm_cpu *dtpm_cpu; + + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); + if (dtpm_cpu) + return dtpm_update_power(&dtpm_cpu->dtpm); + + return 0; +} + +static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) { struct dtpm_cpu *dtpm_cpu; struct cpufreq_policy *policy; @@ -183,6 +195,10 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) char name[CPUFREQ_NAME_LEN]; int ret = -ENOMEM; + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); + if (dtpm_cpu) + return 0; + policy = cpufreq_cpu_get(cpu); if (!policy) return 0; @@ -191,10 +207,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) if (!pd) return -EINVAL; - dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); - if (dtpm_cpu) - return dtpm_update_power(&dtpm_cpu->dtpm); - dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); if (!dtpm_cpu) return -ENOMEM; @@ -207,7 +219,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu); - ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL); + ret = dtpm_register(name, &dtpm_cpu->dtpm, parent); if (ret) goto out_kfree_dtpm_cpu; @@ -231,7 +243,18 @@ out_kfree_dtpm_cpu: return ret; } -static int __init dtpm_cpu_init(void) +static int dtpm_cpu_setup(struct dtpm *dtpm, struct device_node *np) +{ + int cpu; + + cpu = of_cpu_node_to_id(np); + if (cpu < 0) + return 0; + + return __dtpm_cpu_setup(cpu, dtpm); +} + +static int dtpm_cpu_init(void) { int ret; @@ -272,4 +295,5 @@ static int __init dtpm_cpu_init(void) struct dtpm_subsys_ops dtpm_cpu_ops = { .name = KBUILD_MODNAME, .init = dtpm_cpu_init, + .setup = dtpm_cpu_setup, }; -- GitLab From e446556173170e675a7a321e76ce5fa3587de724 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:36 +0100 Subject: [PATCH 0241/1586] powercap/drivers/dtpm: Add dtpm devfreq with energy model support Currently the dtpm supports the CPUs via cpufreq and the energy model. This change provides the same for the device which supports devfreq. Each device supporting devfreq and having an energy model can be added to the hierarchy. The concept is the same as the cpufreq DTPM support: the QoS is used to aggregate the requests and the energy model gives the value of the instantaneous power consumption ponderated by the load of the device. Cc: Chanwoo Choi Cc: Lukasz Luba Cc: Kyungmin Park Cc: MyungJoo Ham Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220128163537.212248-5-daniel.lezcano@linaro.org --- drivers/powercap/Kconfig | 7 ++ drivers/powercap/Makefile | 1 + drivers/powercap/dtpm_devfreq.c | 203 ++++++++++++++++++++++++++++++++ drivers/powercap/dtpm_subsys.h | 4 + 4 files changed, 215 insertions(+) create mode 100644 drivers/powercap/dtpm_devfreq.c diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index b1ca339957e3d..515e3ceb3393a 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -57,4 +57,11 @@ config DTPM_CPU help This enables support for CPU power limitation based on energy model. + +config DTPM_DEVFREQ + bool "Add device power capping based on the energy model" + depends on DTPM && ENERGY_MODEL + help + This enables support for device power limitation based on + energy model. endif diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index fabcf388a8d39..494617cdad885 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_DTPM) += dtpm.o obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o +obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c new file mode 100644 index 0000000000000..91276761a31d9 --- /dev/null +++ b/drivers/powercap/dtpm_devfreq.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Linaro Limited + * + * Author: Daniel Lezcano + * + * The devfreq device combined with the energy model and the load can + * give an estimation of the power consumption as well as limiting the + * power. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +struct dtpm_devfreq { + struct dtpm dtpm; + struct dev_pm_qos_request qos_req; + struct devfreq *devfreq; +}; + +static struct dtpm_devfreq *to_dtpm_devfreq(struct dtpm *dtpm) +{ + return container_of(dtpm, struct dtpm_devfreq, dtpm); +} + +static int update_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + struct devfreq *devfreq = dtpm_devfreq->devfreq; + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + + dtpm->power_min = pd->table[0].power; + dtpm->power_min *= MICROWATT_PER_MILLIWATT; + + dtpm->power_max = pd->table[pd->nr_perf_states - 1].power; + dtpm->power_max *= MICROWATT_PER_MILLIWATT; + + return 0; +} + +static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + struct devfreq *devfreq = dtpm_devfreq->devfreq; + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + unsigned long freq; + u64 power; + int i; + + for (i = 0; i < pd->nr_perf_states; i++) { + + power = pd->table[i].power * MICROWATT_PER_MILLIWATT; + if (power > power_limit) + break; + } + + freq = pd->table[i - 1].frequency; + + dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); + + power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT; + + return power_limit; +} + +static void _normalize_load(struct devfreq_dev_status *status) +{ + if (status->total_time > 0xfffff) { + status->total_time >>= 10; + status->busy_time >>= 10; + } + + status->busy_time <<= 10; + status->busy_time /= status->total_time ? : 1; + + status->busy_time = status->busy_time ? : 1; + status->total_time = 1024; +} + +static u64 get_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + struct devfreq *devfreq = dtpm_devfreq->devfreq; + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + struct devfreq_dev_status status; + unsigned long freq; + u64 power; + int i; + + mutex_lock(&devfreq->lock); + status = devfreq->last_status; + mutex_unlock(&devfreq->lock); + + freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ); + _normalize_load(&status); + + for (i = 0; i < pd->nr_perf_states; i++) { + + if (pd->table[i].frequency < freq) + continue; + + power = pd->table[i].power * MICROWATT_PER_MILLIWATT; + power *= status.busy_time; + power >>= 10; + + return power; + } + + return 0; +} + +static void pd_release(struct dtpm *dtpm) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + + if (dev_pm_qos_request_active(&dtpm_devfreq->qos_req)) + dev_pm_qos_remove_request(&dtpm_devfreq->qos_req); + + kfree(dtpm_devfreq); +} + +static struct dtpm_ops dtpm_ops = { + .set_power_uw = set_pd_power_limit, + .get_power_uw = get_pd_power_uw, + .update_power_uw = update_pd_power_uw, + .release = pd_release, +}; + +static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent) +{ + struct device *dev = devfreq->dev.parent; + struct dtpm_devfreq *dtpm_devfreq; + struct em_perf_domain *pd; + int ret = -ENOMEM; + + pd = em_pd_get(dev); + if (!pd) { + ret = dev_pm_opp_of_register_em(dev, NULL); + if (ret) { + pr_err("No energy model available for '%s'\n", dev_name(dev)); + return -EINVAL; + } + } + + dtpm_devfreq = kzalloc(sizeof(*dtpm_devfreq), GFP_KERNEL); + if (!dtpm_devfreq) + return -ENOMEM; + + dtpm_init(&dtpm_devfreq->dtpm, &dtpm_ops); + + dtpm_devfreq->devfreq = devfreq; + + ret = dtpm_register(dev_name(dev), &dtpm_devfreq->dtpm, parent); + if (ret) { + pr_err("Failed to register '%s': %d\n", dev_name(dev), ret); + kfree(dtpm_devfreq); + return ret; + } + + ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + if (ret) { + pr_err("Failed to add QoS request: %d\n", ret); + goto out_dtpm_unregister; + } + + dtpm_update_power(&dtpm_devfreq->dtpm); + + return 0; + +out_dtpm_unregister: + dtpm_unregister(&dtpm_devfreq->dtpm); + + return ret; +} + +static int dtpm_devfreq_setup(struct dtpm *dtpm, struct device_node *np) +{ + struct devfreq *devfreq; + + devfreq = devfreq_get_devfreq_by_node(np); + if (IS_ERR(devfreq)) + return 0; + + return __dtpm_devfreq_setup(devfreq, dtpm); +} + +struct dtpm_subsys_ops dtpm_devfreq_ops = { + .name = KBUILD_MODNAME, + .setup = dtpm_devfreq_setup, +}; diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h index 2a3a2055f60ed..db1712938a969 100644 --- a/drivers/powercap/dtpm_subsys.h +++ b/drivers/powercap/dtpm_subsys.h @@ -8,11 +8,15 @@ #define ___DTPM_SUBSYS_H__ extern struct dtpm_subsys_ops dtpm_cpu_ops; +extern struct dtpm_subsys_ops dtpm_devfreq_ops; struct dtpm_subsys_ops *dtpm_subsys[] = { #ifdef CONFIG_DTPM_CPU &dtpm_cpu_ops, #endif +#ifdef CONFIG_DTPM_DEVFREQ + &dtpm_devfreq_ops, +#endif }; #endif -- GitLab From b9d6c47a2be8d273ecc063afda6e9fd66a35116d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:37 +0100 Subject: [PATCH 0242/1586] rockchip/soc/drivers: Add DTPM description for rk3399 The DTPM framework does support now the hierarchy description. The platform specific code can call the hierarchy creation function with an array of struct dtpm_node pointing to their parent. This patch provides a description of the big / Little CPUs and the GPU and tie them together under a virtual 'package' name. Only rk3399 is described now. The description could be extended in the future with the memory controller with devfreq. The description is always a module and it describes the soft dependencies. The userspace has to load the softdeps module in the right order. Signed-off-by: Daniel Lezcano Reviewed-by; Heiko Stuebner Link: https://lore.kernel.org/r/20220128163537.212248-6-daniel.lezcano@linaro.org --- drivers/soc/rockchip/Kconfig | 8 +++++ drivers/soc/rockchip/Makefile | 1 + drivers/soc/rockchip/dtpm.c | 59 +++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 drivers/soc/rockchip/dtpm.c diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig index 25eb2c1e31bb2..156ac0e0c8fe5 100644 --- a/drivers/soc/rockchip/Kconfig +++ b/drivers/soc/rockchip/Kconfig @@ -34,4 +34,12 @@ config ROCKCHIP_PM_DOMAINS If unsure, say N. +config ROCKCHIP_DTPM + tristate "Rockchip DTPM hierarchy" + depends on DTPM && m + help + Describe the hierarchy for the Dynamic Thermal Power + Management tree on this platform. That will create all the + power capping capable devices. + endif diff --git a/drivers/soc/rockchip/Makefile b/drivers/soc/rockchip/Makefile index 875032f7344ea..05f31a4e743c9 100644 --- a/drivers/soc/rockchip/Makefile +++ b/drivers/soc/rockchip/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_ROCKCHIP_GRF) += grf.o obj-$(CONFIG_ROCKCHIP_IODOMAIN) += io-domain.o obj-$(CONFIG_ROCKCHIP_PM_DOMAINS) += pm_domains.o +obj-$(CONFIG_ROCKCHIP_DTPM) += dtpm.o diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c new file mode 100644 index 0000000000000..ebebb748488b7 --- /dev/null +++ b/drivers/soc/rockchip/dtpm.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Linaro Limited + * + * Author: Daniel Lezcano + * + * DTPM hierarchy description + */ +#include +#include +#include +#include + +static struct dtpm_node __initdata rk3399_hierarchy[] = { + [0]{ .name = "rk3399", + .type = DTPM_NODE_VIRTUAL }, + [1]{ .name = "package", + .type = DTPM_NODE_VIRTUAL, + .parent = &rk3399_hierarchy[0] }, + [2]{ .name = "/cpus/cpu@0", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [3]{ .name = "/cpus/cpu@1", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [4]{ .name = "/cpus/cpu@2", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [5]{ .name = "/cpus/cpu@3", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [6]{ .name = "/cpus/cpu@100", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [7]{ .name = "/cpus/cpu@101", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [8]{ .name = "/gpu@ff9a0000", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [9]{ /* sentinel */ } +}; + +static struct of_device_id __initdata rockchip_dtpm_match_table[] = { + { .compatible = "rockchip,rk3399", .data = rk3399_hierarchy }, + {}, +}; + +static int __init rockchip_dtpm_init(void) +{ + return dtpm_create_hierarchy(rockchip_dtpm_match_table); +} +module_init(rockchip_dtpm_init); + +MODULE_SOFTDEP("pre: panfrost cpufreq-dt"); +MODULE_DESCRIPTION("Rockchip DTPM driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dtpm"); +MODULE_AUTHOR("Daniel Lezcano Date: Wed, 26 Jan 2022 09:48:53 +0800 Subject: [PATCH 0243/1586] thermal: int340x: Check for NULL after calling kmemdup() As the potential failure of the allocation, kmemdup() may return NULL. Then, 'bin_attr_data_vault.private' will be NULL, but 'bin_attr_data_vault.size' is not 0, which is not consistent. Therefore, it is better to check the return value of kmemdup() to avoid the confusion. Fixes: 0ba13c763aac ("thermal/int340x_thermal: Export GDDV") Signed-off-by: Jiasheng Jiang [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 72acb1f618497..82de8bcd368fc 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -464,6 +464,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); + if (!priv->data_vault) { + kfree(buffer.pointer); + return; + } + bin_attr_data_vault.private = priv->data_vault; bin_attr_data_vault.size = obj->package.elements[0].buffer.length; kfree(buffer.pointer); -- GitLab From ae26508651272695a3ab353f75ab9a8daf3da324 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 23 Jan 2022 20:45:06 +0800 Subject: [PATCH 0244/1586] cpufreq: Move to_gov_attr_set() to cpufreq.h So it can be reused by other codes. Signed-off-by: Kevin Hao Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor_attr_set.c | 5 ----- include/linux/cpufreq.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c index a6f365b9cc1ad..771770ea0ed0b 100644 --- a/drivers/cpufreq/cpufreq_governor_attr_set.c +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -8,11 +8,6 @@ #include "cpufreq_governor.h" -static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) -{ - return container_of(kobj, struct gov_attr_set, kobj); -} - static inline struct governor_attr *to_gov_attr(struct attribute *attr) { return container_of(attr, struct governor_attr, attr); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ab29e61b078e..f0dfc0b260ecb 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -658,6 +658,11 @@ struct gov_attr_set { /* sysfs ops for cpufreq governors */ extern const struct sysfs_ops governor_sysfs_ops; +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); -- GitLab From 53725c4cbd4567423ff6143c5d10300e53ecf52a Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 23 Jan 2022 20:45:07 +0800 Subject: [PATCH 0245/1586] cpufreq: schedutil: Use to_gov_attr_set() to get the gov_attr_set The to_gov_attr_set() has been moved to the cpufreq.h, so use it to get the gov_attr_set. Signed-off-by: Kevin Hao Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 26778884d9ab1..cffcd08f4ec8f 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -539,7 +539,7 @@ ATTRIBUTE_GROUPS(sugov); static void sugov_tunables_free(struct kobject *kobj) { - struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj); + struct gov_attr_set *attr_set = to_gov_attr_set(kobj); kfree(to_sugov_tunables(attr_set)); } -- GitLab From a11cda8e2f184e35e42bbb96a27272b3c5e801d2 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 23 Jan 2022 10:38:34 -0800 Subject: [PATCH 0246/1586] thermal: intel_powerclamp: don't use bitmap_weight() in end_power_clamp() Don't call bitmap_weight() if the following code can get by without it. Signed-off-by: Yury Norov Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 14256421d98c2..c841ab37e7c6d 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -556,12 +556,9 @@ static void end_power_clamp(void) * stop faster. */ clamping = false; - if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { - for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { - pr_debug("clamping worker for cpu %d alive, destroy\n", - i); - stop_power_clamp_worker(i); - } + for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { + pr_debug("clamping worker for cpu %d alive, destroy\n", i); + stop_power_clamp_worker(i); } } -- GitLab From 7ddf5e37631ac7a96920f0f8aa3c8c4c289aaa25 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Jan 2022 20:43:59 +0100 Subject: [PATCH 0247/1586] cpufreq: longhaul: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/longhaul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c538a153ee828..3e000e1a75c6c 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -668,9 +668,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, u32 nesting_level, void *context, void **return_value) { - struct acpi_device *d; + struct acpi_device *d = acpi_fetch_acpi_dev(obj_handle); - if (acpi_bus_get_device(obj_handle, &d)) + if (!d) return 0; *return_value = acpi_driver_data(d); -- GitLab From 098c874e20be2a4cee3021aa9b3485ed5e1f4d5b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Jan 2022 20:45:17 +0100 Subject: [PATCH 0248/1586] thermal: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- .../intel/int340x_thermal/acpi_thermal_rel.c | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c index e90690a234c40..01b80331eab66 100644 --- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c @@ -72,7 +72,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, int i; int nr_bad_entries = 0; struct trt *trts; - struct acpi_device *adev; union acpi_object *p; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer element = { 0, NULL }; @@ -112,12 +111,10 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, if (!create_dev) continue; - result = acpi_bus_get_device(trt->source, &adev); - if (result) + if (!acpi_fetch_acpi_dev(trt->source)) pr_warn("Failed to get source ACPI device\n"); - result = acpi_bus_get_device(trt->target, &adev); - if (result) + if (!acpi_fetch_acpi_dev(trt->target)) pr_warn("Failed to get target ACPI device\n"); } @@ -149,7 +146,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, int i; int nr_bad_entries = 0; struct art *arts; - struct acpi_device *adev; union acpi_object *p; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer element = { 0, NULL }; @@ -191,16 +187,11 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, if (!create_dev) continue; - if (art->source) { - result = acpi_bus_get_device(art->source, &adev); - if (result) - pr_warn("Failed to get source ACPI device\n"); - } - if (art->target) { - result = acpi_bus_get_device(art->target, &adev); - if (result) - pr_warn("Failed to get target ACPI device\n"); - } + if (!acpi_fetch_acpi_dev(art->source)) + pr_warn("Failed to get source ACPI device\n"); + + if (!acpi_fetch_acpi_dev(art->target)) + pr_warn("Failed to get target ACPI device\n"); } *artp = arts; -- GitLab From ed945296bb90ce79a7ff331885dd37eafa38b551 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Jan 2022 20:51:27 +0100 Subject: [PATCH 0249/1586] PNP: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/pnp/pnpacpi/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index afaf30a3622c0..38928ff7472b0 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -287,9 +287,9 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle, u32 lvl, void *context, void **rv) { - struct acpi_device *device; + struct acpi_device *device = acpi_fetch_acpi_dev(handle); - if (acpi_bus_get_device(handle, &device)) + if (!device) return AE_CTRL_DEPTH; if (acpi_is_pnp_device(device)) pnpacpi_add_device(device); -- GitLab From 70f4169ab421b277caf7429e84f468d8c47aa00a Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 2 Feb 2022 13:55:29 +0100 Subject: [PATCH 0250/1586] selinux: parse contexts for mount options early Commit b8b87fd954b4 ("selinux: Fix selinux_sb_mnt_opts_compat()") started to parse mount options into SIDs in selinux_add_opt() if policy has already been loaded. Since it's extremely unlikely that anyone would depend on the ability to set SELinux contexts on fs_context before loading the policy and then mounting that context after simplify the logic by always parsing the options early. Note that the multi-step mounting is only possible with the new fscontext mount API and wasn't possible before its introduction. Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- security/selinux/hooks.c | 202 ++++++++++----------------------------- 1 file changed, 53 insertions(+), 149 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b24a1aeeedd43..ab32303e66181 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -340,7 +340,6 @@ static void inode_free_security(struct inode *inode) } struct selinux_mnt_opts { - const char *fscontext, *context, *rootcontext, *defcontext; u32 fscontext_sid; u32 context_sid; u32 rootcontext_sid; @@ -349,12 +348,7 @@ struct selinux_mnt_opts { static void selinux_free_mnt_opts(void *mnt_opts) { - struct selinux_mnt_opts *opts = mnt_opts; - kfree(opts->fscontext); - kfree(opts->context); - kfree(opts->rootcontext); - kfree(opts->defcontext); - kfree(opts); + kfree(mnt_opts); } enum { @@ -601,17 +595,6 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid) -{ - int rc = security_context_str_to_sid(&selinux_state, s, - sid, GFP_KERNEL); - if (rc) - pr_warn("SELinux: security_context_str_to_sid" - "(%s) failed for (dev %s, type %s) errno=%d\n", - s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc); - return rc; -} - /* * Allow filesystems with binary mount data to explicitly set mount point * labeling information. @@ -674,49 +657,29 @@ static int selinux_set_mnt_opts(struct super_block *sb, * than once with different security options. */ if (opts) { - if (opts->fscontext) { - if (opts->fscontext_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid); - if (rc) - goto out; - } else - fscontext_sid = opts->fscontext_sid; + if (opts->fscontext_sid) { + fscontext_sid = opts->fscontext_sid; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, fscontext_sid)) goto out_double_mount; sbsec->flags |= FSCONTEXT_MNT; } - if (opts->context) { - if (opts->context_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->context, &context_sid); - if (rc) - goto out; - } else - context_sid = opts->context_sid; + if (opts->context_sid) { + context_sid = opts->context_sid; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, context_sid)) goto out_double_mount; sbsec->flags |= CONTEXT_MNT; } - if (opts->rootcontext) { - if (opts->rootcontext_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); - if (rc) - goto out; - } else - rootcontext_sid = opts->rootcontext_sid; + if (opts->rootcontext_sid) { + rootcontext_sid = opts->rootcontext_sid; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, rootcontext_sid)) goto out_double_mount; sbsec->flags |= ROOTCONTEXT_MNT; } - if (opts->defcontext) { - if (opts->defcontext_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid); - if (rc) - goto out; - } else - defcontext_sid = opts->defcontext_sid; + if (opts->defcontext_sid) { + defcontext_sid = opts->defcontext_sid; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, defcontext_sid)) goto out_double_mount; @@ -986,6 +949,8 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) { struct selinux_mnt_opts *opts = *mnt_opts; bool is_alloc_opts = false; + u32 *dst_sid; + int rc; if (token == Opt_seclabel) /* eaten and completely ignored */ @@ -993,6 +958,11 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) if (!s) return -ENOMEM; + if (!selinux_initialized(&selinux_state)) { + pr_warn("SELinux: Unable to set superblock options before the security server is initialized\n"); + return -EINVAL; + } + if (!opts) { opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) @@ -1003,36 +973,34 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) switch (token) { case Opt_context: - if (opts->context || opts->defcontext) + if (opts->context_sid || opts->defcontext_sid) goto err; - opts->context = s; - if (selinux_initialized(&selinux_state)) - parse_sid(NULL, s, &opts->context_sid); + dst_sid = &opts->context_sid; break; case Opt_fscontext: - if (opts->fscontext) + if (opts->fscontext_sid) goto err; - opts->fscontext = s; - if (selinux_initialized(&selinux_state)) - parse_sid(NULL, s, &opts->fscontext_sid); + dst_sid = &opts->fscontext_sid; break; case Opt_rootcontext: - if (opts->rootcontext) + if (opts->rootcontext_sid) goto err; - opts->rootcontext = s; - if (selinux_initialized(&selinux_state)) - parse_sid(NULL, s, &opts->rootcontext_sid); + dst_sid = &opts->rootcontext_sid; break; case Opt_defcontext: - if (opts->context || opts->defcontext) + if (opts->context_sid || opts->defcontext_sid) goto err; - opts->defcontext = s; - if (selinux_initialized(&selinux_state)) - parse_sid(NULL, s, &opts->defcontext_sid); + dst_sid = &opts->defcontext_sid; break; + default: + WARN_ON(1); + return -EINVAL; } - - return 0; + rc = security_context_str_to_sid(&selinux_state, s, dst_sid, GFP_KERNEL); + if (rc) + pr_warn("SELinux: security_context_str_to_sid (%s) failed with errno=%d\n", + s, rc); + return rc; err: if (is_alloc_opts) { @@ -2681,37 +2649,27 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) if (!opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; - if (opts->fscontext) { - if (opts->fscontext_sid == SECSID_NULL) - return 1; - else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, - opts->fscontext_sid)) + if (opts->fscontext_sid) { + if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, + opts->fscontext_sid)) return 1; } - if (opts->context) { - if (opts->context_sid == SECSID_NULL) - return 1; - else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, - opts->context_sid)) + if (opts->context_sid) { + if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, + opts->context_sid)) return 1; } - if (opts->rootcontext) { - if (opts->rootcontext_sid == SECSID_NULL) - return 1; - else { - struct inode_security_struct *root_isec; + if (opts->rootcontext_sid) { + struct inode_security_struct *root_isec; - root_isec = backing_inode_security(sb->s_root); - if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, - opts->rootcontext_sid)) - return 1; - } - } - if (opts->defcontext) { - if (opts->defcontext_sid == SECSID_NULL) + root_isec = backing_inode_security(sb->s_root); + if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, + opts->rootcontext_sid)) return 1; - else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, - opts->defcontext_sid)) + } + if (opts->defcontext_sid) { + if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, + opts->defcontext_sid)) return 1; } return 0; @@ -2721,7 +2679,6 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) { struct selinux_mnt_opts *opts = mnt_opts; struct superblock_security_struct *sbsec = selinux_superblock(sb); - int rc; if (!(sbsec->flags & SE_SBINITIALIZED)) return 0; @@ -2729,47 +2686,24 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (!opts) return 0; - if (opts->fscontext) { - if (opts->fscontext_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->fscontext, - &opts->fscontext_sid); - if (rc) - return rc; - } + if (opts->fscontext_sid) { if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, opts->fscontext_sid)) goto out_bad_option; } - if (opts->context) { - if (opts->context_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->context, &opts->context_sid); - if (rc) - return rc; - } + if (opts->context_sid) { if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, opts->context_sid)) goto out_bad_option; } - if (opts->rootcontext) { + if (opts->rootcontext_sid) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - if (opts->rootcontext_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->rootcontext, - &opts->rootcontext_sid); - if (rc) - return rc; - } if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, opts->rootcontext_sid)) goto out_bad_option; } - if (opts->defcontext) { - if (opts->defcontext_sid == SECSID_NULL) { - rc = parse_sid(sb, opts->defcontext, - &opts->defcontext_sid); - if (rc) - return rc; - } + if (opts->defcontext_sid) { if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, opts->defcontext_sid)) goto out_bad_option; @@ -2838,42 +2772,12 @@ static int selinux_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { const struct selinux_mnt_opts *src = src_fc->security; - struct selinux_mnt_opts *opts; if (!src) return 0; - fc->security = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL); - if (!fc->security) - return -ENOMEM; - - opts = fc->security; - - if (src->fscontext) { - opts->fscontext = kstrdup(src->fscontext, GFP_KERNEL); - if (!opts->fscontext) - return -ENOMEM; - } - if (src->context) { - opts->context = kstrdup(src->context, GFP_KERNEL); - if (!opts->context) - return -ENOMEM; - } - if (src->rootcontext) { - opts->rootcontext = kstrdup(src->rootcontext, GFP_KERNEL); - if (!opts->rootcontext) - return -ENOMEM; - } - if (src->defcontext) { - opts->defcontext = kstrdup(src->defcontext, GFP_KERNEL); - if (!opts->defcontext) - return -ENOMEM; - } - opts->fscontext_sid = src->fscontext_sid; - opts->context_sid = src->context_sid; - opts->rootcontext_sid = src->rootcontext_sid; - opts->defcontext_sid = src->defcontext_sid; - return 0; + fc->security = kmemdup(src, sizeof(*src), GFP_KERNEL); + return fc->security ? 0 : -ENOMEM; } static const struct fs_parameter_spec selinux_fs_parameters[] = { -- GitLab From 783dedf41b79ac7a3a68b51cf6f88cbfd6dc3292 Mon Sep 17 00:00:00 2001 From: Robert Kiraly Date: Thu, 27 Jan 2022 02:15:23 -0800 Subject: [PATCH 0251/1586] ACPI: tables: Add CEDT signature to the list of known tables Add ACPI_SIG_CEDT to table_sigs[] in "drivers/acpi/tables.c". Signed-off-by: Robert Kiraly [ rjw: Rebase, new subject and changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 0741a4933f622..2d7ed7126faac 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -545,7 +545,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, - ACPI_SIG_NHLT, ACPI_SIG_AEST }; + ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT }; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) -- GitLab From 26a03981318d2e0f12456a1041fe3bf8af5416cf Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 24 Jan 2022 21:29:46 +0100 Subject: [PATCH 0252/1586] hwrng: core - explicit ordering of initcalls hw-random device drivers depend on the hw-random core being initialized. Make this ordering explicit, also for the case these drivers are built-in. As the core itself depends on misc_register() which is set up at subsys_initcall time, advance the initialization of the core (only) to the fs_initcall() level. Cc: Matt Mackall Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a3db27916256d..e860e044b19e3 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -638,7 +638,7 @@ static void __exit hwrng_modexit(void) unregister_miscdev(); } -module_init(hwrng_modinit); +fs_initcall(hwrng_modinit); /* depends on misc_register() */ module_exit(hwrng_modexit); MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver"); -- GitLab From 6ff6304497c900a0284e3aa89ad86f0ed01f86d8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 24 Jan 2022 21:29:47 +0100 Subject: [PATCH 0253/1586] hwrng: core - read() callback must be called for size of 32 or more bytes According to , the @max parameter of the ->read callback "is a multiple of 4 and >= 32 bytes". That promise was not kept by add_early_randomness(), which only asked for 16 bytes. As rng_buffer_size() is at least 32, we can simply ask for 32 bytes. Cc: Matt Mackall Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index e860e044b19e3..c2d260b5dd921 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -64,10 +64,9 @@ static size_t rng_buffer_size(void) static void add_early_randomness(struct hwrng *rng) { int bytes_read; - size_t size = min_t(size_t, 16, rng_buffer_size()); mutex_lock(&reading_mutex); - bytes_read = rng_get_data(rng, rng_buffer, size, 0); + bytes_read = rng_get_data(rng, rng_buffer, 32, 0); mutex_unlock(&reading_mutex); if (bytes_read > 0) add_device_randomness(rng_buffer, bytes_read); -- GitLab From c05ac449442dd214e262057a3a49c368e224c384 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 24 Jan 2022 21:29:48 +0100 Subject: [PATCH 0254/1586] hwrng: core - use rng_fillbuf in add_early_randomness() Using rng_buffer in add_early_randomness() may race with rng_dev_read(). Use rng_fillbuf instead, as it is otherwise only used within the kernel by hwrng_fillfn() and therefore never exposed to userspace. Cc: Matt Mackall Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index c2d260b5dd921..89891ac87af00 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -66,10 +66,10 @@ static void add_early_randomness(struct hwrng *rng) int bytes_read; mutex_lock(&reading_mutex); - bytes_read = rng_get_data(rng, rng_buffer, 32, 0); + bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0); mutex_unlock(&reading_mutex); if (bytes_read > 0) - add_device_randomness(rng_buffer, bytes_read); + add_device_randomness(rng_fillbuf, bytes_read); } static inline void cleanup_rng(struct kref *kref) -- GitLab From f41aa47c8bb40bcbad51fc368fe15f96406897cb Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 24 Jan 2022 21:29:49 +0100 Subject: [PATCH 0255/1586] hwrng: core - only set cur_rng_set_by_user if it is working In case the user-specified rng device is not working, it is not used; therefore cur_rng_set_by_user must not be set to 1. Cc: Matt Mackall Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 89891ac87af00..9405fcdace384 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -335,8 +335,9 @@ static ssize_t rng_current_store(struct device *dev, } else { list_for_each_entry(rng, &rng_list, list) { if (sysfs_streq(rng->name, buf)) { - cur_rng_set_by_user = 1; err = set_current_rng(rng); + if (!err) + cur_rng_set_by_user = 1; break; } } -- GitLab From f4f7c153a61782f3fb259d0f39aab91444e555d9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 24 Jan 2022 21:29:50 +0100 Subject: [PATCH 0256/1586] hwrng: core - break out of hwrng_fillfn if current rng is not trusted For two reasons, current_quality may become zero within the rngd kernel thread: (1) The user lowers current_quality to 0 by writing to the sysfs module parameter file (note that increasing the quality from zero is without effect at the moment), or (2) there are two or more hwrng devices registered, and those which provide quality>0 are unregistered, but one with quality==0 remains. If current_quality is 0, the randomness is not trusted and cannot help to increase the entropy count. That will lead to continuous calls to the hwrngd thread and continuous stirring of the input pool with untrusted bits. Cc: Matt Mackall Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 9405fcdace384..bc9f95cbac921 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -429,6 +429,9 @@ static int hwrng_fillfn(void *unused) while (!kthread_should_stop()) { struct hwrng *rng; + if (!current_quality) + break; + rng = get_current_rng(); if (IS_ERR(rng) || !rng) break; -- GitLab From bd9305b0cb69bfe98885a63a9e6231ae92e822e2 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 Jan 2022 23:56:22 +0530 Subject: [PATCH 0257/1586] crypto: octeontx2 - CN10K CPT to RNM workaround When software sets CPT_AF_CTL[RNM_REQ_EN]=1 and RNM in not producing entropy(i.e., RNM_ENTROPY_STATUS[NORMAL_CNT] < 0x40), the first cycle of the response may be lost due to a conditional clocking issue. Due to this, the subsequent random number stream will be corrupted. So, this patch adds support to ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing CPT_AF_CTL[RNM_REQ_EN] = 1, as a workaround. Signed-off-by: Srujana Challa Signed-off-by: Shijith Thotton Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cptpf_ucode.c | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 4c8ebdf671ca8..addc760501a95 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1076,6 +1076,39 @@ static void delete_engine_grps(struct pci_dev *pdev, delete_engine_group(&pdev->dev, &eng_grps->grp[i]); } +#define PCI_DEVID_CN10K_RNM 0xA098 +#define RNM_ENTROPY_STATUS 0x8 + +static void rnm_to_cpt_errata_fixup(struct device *dev) +{ + struct pci_dev *pdev; + void __iomem *base; + int timeout = 5000; + + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL); + if (!pdev) + return; + + base = pci_ioremap_bar(pdev, 0); + if (!base) + goto put_pdev; + + while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + dev_warn(dev, "RNM is not producing entropy\n"); + break; + } + } + + iounmap(base); + +put_pdev: + pci_dev_put(pdev); +} + int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type) { @@ -1189,9 +1222,17 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, if (is_dev_otx2(pdev)) goto unlock; + + /* + * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing + * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata. + */ + rnm_to_cpt_errata_fixup(&pdev->dev); + /* * Configure engine group mask to allow context prefetching - * for the groups. + * for the groups and enable random number request, to enable + * CPT to request random numbers from RNM. */ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL, OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16), -- GitLab From 9eef6e972a32bc2454a22e8f0e8d4e7f55ff6613 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 Jan 2022 23:56:23 +0530 Subject: [PATCH 0258/1586] crypto: octeontx2 - disable DMA black hole on an DMA fault When CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to LLC/DRAM encounters a fault/poison, a rare case may result in unpredictable data being delivered to a CPT engine. So, this patch adds code to set FLT_DIS as a workaround. Signed-off-by: Srujana Challa Signed-off-by: Shijith Thotton Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c | 13 +++++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index addc760501a95..ede84abf2988c 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1144,6 +1144,7 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} }; struct pci_dev *pdev = cptpf->pdev; struct fw_info_t fw_info; + u64 reg_val; int ret = 0; mutex_lock(&eng_grps->lock); @@ -1244,6 +1245,18 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, */ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER, CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0); + + /* + * Set CPT_AF_DIAG[FLT_DIS], as a workaround for HW errata, when + * CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to LLC/DRAM + * encounters a fault/poison, a rare case may result in + * unpredictable data being delivered to a CPT engine. + */ + otx2_cpt_read_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, ®_val, + BLKADDR_CPT0); + otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, + reg_val | BIT_ULL(24), BLKADDR_CPT0); + mutex_unlock(&eng_grps->lock); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index a73a8017e0ee9..a79201a9a6f03 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -605,6 +605,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { /* Registers that can be accessed from PF */ switch (offset) { + case CPT_AF_DIAG: case CPT_AF_CTL: case CPT_AF_PF_FUNC: case CPT_AF_BLK_RST: -- GitLab From e236ab0d43622a8a5a8ff06630fd467b444a9db9 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 Jan 2022 23:56:24 +0530 Subject: [PATCH 0259/1586] crypto: octeontx2 - increase CPT HW instruction queue length LDWB is getting incorrectly used in HW when CPT_AF_LF()_PTR_CTL[IQB_LDWB]=1 and CPT instruction queue has less than 320 free entries. So, increase HW instruction queue size by 320 and give 320 entries less for SW/NIX RX as a SW workaround. Signed-off-by: Srujana Challa Signed-off-by: Shijith Thotton Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index b691b6c1d5c45..4fcaf61a70e36 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -26,12 +26,22 @@ */ #define OTX2_CPT_INST_QLEN_MSGS ((OTX2_CPT_SIZE_DIV40 - 1) * 40) +/* + * LDWB is getting incorrectly used when IQB_LDWB = 1 and CPT instruction + * queue has less than 320 free entries. So, increase HW instruction queue + * size by 320 and give 320 entries less for SW/NIX RX as a workaround. + */ +#define OTX2_CPT_INST_QLEN_EXTRA_BYTES (320 * OTX2_CPT_INST_SIZE) +#define OTX2_CPT_EXTRA_SIZE_DIV40 (320/40) + /* CPT instruction queue length in bytes */ -#define OTX2_CPT_INST_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 40 * \ - OTX2_CPT_INST_SIZE) +#define OTX2_CPT_INST_QLEN_BYTES \ + ((OTX2_CPT_SIZE_DIV40 * 40 * OTX2_CPT_INST_SIZE) + \ + OTX2_CPT_INST_QLEN_EXTRA_BYTES) /* CPT instruction group queue length in bytes */ -#define OTX2_CPT_INST_GRP_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 16) +#define OTX2_CPT_INST_GRP_QLEN_BYTES \ + ((OTX2_CPT_SIZE_DIV40 + OTX2_CPT_EXTRA_SIZE_DIV40) * 16) /* CPT FC length in bytes */ #define OTX2_CPT_Q_FC_LEN 128 @@ -179,7 +189,8 @@ static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf) { union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 }; - lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40; + lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40 + + OTX2_CPT_EXTRA_SIZE_DIV40; otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, OTX2_CPT_LF_Q_SIZE, lf_q_size.u); } -- GitLab From 8daa399edeed4cfa792ccea12beda50d445ab6a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Jan 2022 10:52:11 +0100 Subject: [PATCH 0260/1586] crypto: arm64/aes-neon-ctr - improve handling of single tail block Instead of falling back to C code to do a memcpy of the output of the last block, handle this in the asm code directly if possible, which is the case if the entire input is longer than 16 bytes. Cc: Nathan Huckleberry Cc: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 21 +++++++-------------- arch/arm64/crypto/aes-modes.S | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 30b7cc6a70790..7d66f8babb1d6 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -24,7 +24,6 @@ #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 -#define STRIDE 5 #define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt @@ -42,7 +41,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 -#define STRIDE 4 #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt @@ -89,7 +87,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int bytes, u8 const iv[]); asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], - int rounds, int bytes, u8 ctr[], u8 finalbuf[]); + int rounds, int bytes, u8 ctr[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int bytes, u32 const rk2[], u8 iv[], @@ -458,26 +456,21 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req) unsigned int nbytes = walk.nbytes; u8 *dst = walk.dst.virt.addr; u8 buf[AES_BLOCK_SIZE]; - unsigned int tail; if (unlikely(nbytes < AES_BLOCK_SIZE)) - src = memcpy(buf, src, nbytes); + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); kernel_neon_begin(); aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv, buf); + walk.iv); kernel_neon_end(); - tail = nbytes % (STRIDE * AES_BLOCK_SIZE); - if (tail > 0 && tail < AES_BLOCK_SIZE) - /* - * The final partial block could not be returned using - * an overlapping store, so it was passed via buf[] - * instead. - */ - memcpy(dst + nbytes - tail, buf, tail); + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index ff01f0167ba2c..dc35eb0245c55 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt) /* * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int bytes, u8 ctr[], u8 finalbuf[]) + * int bytes, u8 ctr[]) */ AES_FUNC_START(aes_ctr_encrypt) @@ -414,8 +414,8 @@ ST5( st1 {v4.16b}, [x0], #16 ) .Lctrtail: /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ mov x16, #16 - ands x13, x4, #0xf - csel x13, x13, x16, ne + ands x6, x4, #0xf + csel x13, x6, x16, ne ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) ST5( csel x14, x16, xzr, gt ) @@ -424,10 +424,10 @@ ST5( csel x14, x16, xzr, gt ) cmp w4, #32 - (MAX_STRIDE << 4) csel x16, x16, xzr, gt cmp w4, #16 - (MAX_STRIDE << 4) - ble .Lctrtail1x adr_l x12, .Lcts_permute_table add x12, x12, x13 + ble .Lctrtail1x ST5( ld1 {v5.16b}, [x1], x14 ) ld1 {v6.16b}, [x1], x15 @@ -462,11 +462,19 @@ ST5( st1 {v5.16b}, [x0], x14 ) b .Lctrout .Lctrtail1x: - csel x0, x0, x6, eq // use finalbuf if less than a full block + sub x7, x6, #16 + csel x6, x6, x7, eq + add x1, x1, x6 + add x0, x0, x6 ld1 {v5.16b}, [x1] + ld1 {v6.16b}, [x0] ST5( mov v3.16b, v4.16b ) encrypt_block v3, w3, x2, x8, w7 + ld1 {v10.16b-v11.16b}, [x12] + tbl v3.16b, {v3.16b}, v10.16b + sshr v11.16b, v11.16b, #7 eor v5.16b, v5.16b, v3.16b + bif v5.16b, v6.16b, v11.16b st1 {v5.16b}, [x0] b .Lctrout AES_FUNC_END(aes_ctr_encrypt) -- GitLab From c8bf850e991a1838964ed8d8426f96cf8d3fbab5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Jan 2022 12:35:43 +0100 Subject: [PATCH 0261/1586] crypto: arm/aes-neonbs-ctr - deal with non-multiples of AES block size Instead of falling back to C code to deal with the final bit of input that is not a round multiple of the block size, handle this in the asm code, permitting us to use overlapping loads and stores for performance, and implement the 16-byte wide XOR using a single NEON instruction. Since NEON loads and stores have a natural width of 16 bytes, we need to handle inputs of less than 16 bytes in a special way, but this rarely occurs in practice so it does not impact performance. All other input sizes can be consumed directly by the NEON asm code, although it should be noted that the core AES transform can still only process 128 bytes (8 AES blocks) at a time. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-core.S | 105 ++++++++++++++++++------------ arch/arm/crypto/aes-neonbs-glue.c | 35 ++++------ 2 files changed, 77 insertions(+), 63 deletions(-) diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index 7d0cc7f226a50..7b61032f29fad 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -758,29 +758,24 @@ ENTRY(aesbs_cbc_decrypt) ENDPROC(aesbs_cbc_decrypt) .macro next_ctr, q - vmov.32 \q\()h[1], r10 + vmov \q\()h, r9, r10 adds r10, r10, #1 - vmov.32 \q\()h[0], r9 adcs r9, r9, #0 - vmov.32 \q\()l[1], r8 + vmov \q\()l, r7, r8 adcs r8, r8, #0 - vmov.32 \q\()l[0], r7 adc r7, r7, #0 vrev32.8 \q, \q .endm /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 ctr[], u8 final[]) + * int rounds, int bytes, u8 ctr[]) */ ENTRY(aesbs_ctr_encrypt) mov ip, sp push {r4-r10, lr} - ldm ip, {r5-r7} // load args 4-6 - teq r7, #0 - addne r5, r5, #1 // one extra block if final != 0 - + ldm ip, {r5, r6} // load args 4-5 vld1.8 {q0}, [r6] // load counter vrev32.8 q1, q0 vmov r9, r10, d3 @@ -792,20 +787,19 @@ ENTRY(aesbs_ctr_encrypt) adc r7, r7, #0 99: vmov q1, q0 + sub lr, r5, #1 vmov q2, q0 + adr ip, 0f vmov q3, q0 + and lr, lr, #112 vmov q4, q0 + cmp r5, #112 vmov q5, q0 + sub ip, ip, lr, lsl #1 vmov q6, q0 + add ip, ip, lr, lsr #2 vmov q7, q0 - - adr ip, 0f - sub lr, r5, #1 - and lr, lr, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #5 - sub ip, ip, lr, lsl #2 - movlt pc, ip // computed goto if blocks < 8 + movle pc, ip // computed goto if bytes < 112 next_ctr q1 next_ctr q2 @@ -820,12 +814,14 @@ ENTRY(aesbs_ctr_encrypt) bl aesbs_encrypt8 adr ip, 1f - and lr, r5, #7 - cmp r5, #8 - movgt r4, #0 - ldrle r4, [sp, #40] // load final in the last round - sub ip, ip, lr, lsl #2 - movlt pc, ip // computed goto if blocks < 8 + sub lr, r5, #1 + cmp r5, #128 + bic lr, lr, #15 + ands r4, r5, #15 // preserves C flag + teqcs r5, r5 // set Z flag if not last iteration + sub ip, ip, lr, lsr #2 + rsb r4, r4, #16 + movcc pc, ip // computed goto if bytes < 128 vld1.8 {q8}, [r1]! vld1.8 {q9}, [r1]! @@ -834,46 +830,70 @@ ENTRY(aesbs_ctr_encrypt) vld1.8 {q12}, [r1]! vld1.8 {q13}, [r1]! vld1.8 {q14}, [r1]! - teq r4, #0 // skip last block if 'final' -1: bne 2f +1: subne r1, r1, r4 vld1.8 {q15}, [r1]! -2: adr ip, 3f - cmp r5, #8 - sub ip, ip, lr, lsl #3 - movlt pc, ip // computed goto if blocks < 8 + add ip, ip, #2f - 1b veor q0, q0, q8 - vst1.8 {q0}, [r0]! veor q1, q1, q9 - vst1.8 {q1}, [r0]! veor q4, q4, q10 - vst1.8 {q4}, [r0]! veor q6, q6, q11 - vst1.8 {q6}, [r0]! veor q3, q3, q12 - vst1.8 {q3}, [r0]! veor q7, q7, q13 - vst1.8 {q7}, [r0]! veor q2, q2, q14 + bne 3f + veor q5, q5, q15 + + movcc pc, ip // computed goto if bytes < 128 + + vst1.8 {q0}, [r0]! + vst1.8 {q1}, [r0]! + vst1.8 {q4}, [r0]! + vst1.8 {q6}, [r0]! + vst1.8 {q3}, [r0]! + vst1.8 {q7}, [r0]! vst1.8 {q2}, [r0]! - teq r4, #0 // skip last block if 'final' - W(bne) 5f -3: veor q5, q5, q15 +2: subne r0, r0, r4 vst1.8 {q5}, [r0]! -4: next_ctr q0 + next_ctr q0 - subs r5, r5, #8 + subs r5, r5, #128 bgt 99b vst1.8 {q0}, [r6] pop {r4-r10, pc} -5: vst1.8 {q5}, [r4] - b 4b +3: adr lr, .Lpermute_table + 16 + cmp r5, #16 // Z flag remains cleared + sub lr, lr, r4 + vld1.8 {q8-q9}, [lr] + vtbl.8 d16, {q5}, d16 + vtbl.8 d17, {q5}, d17 + veor q5, q8, q15 + bcc 4f // have to reload prev if R5 < 16 + vtbx.8 d10, {q2}, d18 + vtbx.8 d11, {q2}, d19 + mov pc, ip // branch back to VST sequence + +4: sub r0, r0, r4 + vshr.s8 q9, q9, #7 // create mask for VBIF + vld1.8 {q8}, [r0] // reload + vbif q5, q8, q9 + vst1.8 {q5}, [r0] + pop {r4-r10, pc} ENDPROC(aesbs_ctr_encrypt) + .align 6 +.Lpermute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .macro next_tweak, out, in, const, tmp vshr.s64 \tmp, \in, #63 vand \tmp, \tmp, \const @@ -888,6 +908,7 @@ ENDPROC(aesbs_ctr_encrypt) * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[], int reorder_last_tweak) */ + .align 6 __xts_prepare8: vld1.8 {q14}, [r7] // load iv vmov.i32 d30, #0x87 // compose tweak mask vector diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 5c6cd3c63cbc1..f00f042ef3570 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -37,7 +37,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 ctr[], u8 final[]); + int rounds, int blocks, u8 ctr[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[], int); @@ -243,32 +243,25 @@ static int ctr_encrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes > 0) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + int bytes = walk.nbytes; - if (walk.nbytes < walk.total) { - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - final = NULL; - } + if (unlikely(bytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - bytes, + src, bytes); + else if (walk.nbytes < walk.total) + bytes &= ~(8 * AES_BLOCK_SIZE - 1); kernel_neon_begin(); - aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->rk, ctx->rounds, blocks, walk.iv, final); + aesbs_ctr_encrypt(dst, src, ctx->rk, ctx->rounds, bytes, walk.iv); kernel_neon_end(); - if (final) { - u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + if (unlikely(bytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - bytes, bytes); - crypto_xor_cpy(dst, src, final, - walk.total % AES_BLOCK_SIZE); - - err = skcipher_walk_done(&walk, 0); - break; - } - err = skcipher_walk_done(&walk, - walk.nbytes - blocks * AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - bytes); } return err; -- GitLab From fc074e130051015e39245a4241956ff122e2f465 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Jan 2022 12:35:44 +0100 Subject: [PATCH 0262/1586] crypto: arm64/aes-neonbs-ctr - fallback to plain NEON for final chunk Instead of processing the entire input with the 8-way bit sliced algorithm, which is sub-optimal for inputs that are not a multiple of 128 bytes in size, invoke the plain NEON version of CTR for the remainder of the input after processing the bulk using 128 byte strides. This allows us to greatly simplify the asm code that implements CTR, and get rid of all the branches and special code paths. It also gains us a couple of percent of performance. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 1 + arch/arm64/crypto/aes-neonbs-core.S | 132 ++++++---------------------- arch/arm64/crypto/aes-neonbs-glue.c | 64 ++++++-------- 3 files changed, 55 insertions(+), 142 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 7d66f8babb1d6..561dd23325711 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -976,6 +976,7 @@ module_cpu_feature_match(AES, aes_init); module_init(aes_init); EXPORT_SYMBOL(neon_aes_ecb_encrypt); EXPORT_SYMBOL(neon_aes_cbc_encrypt); +EXPORT_SYMBOL(neon_aes_ctr_encrypt); EXPORT_SYMBOL(neon_aes_xts_encrypt); EXPORT_SYMBOL(neon_aes_xts_decrypt); #endif diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index a3405b8c344b5..f2761481181d4 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -869,133 +869,51 @@ SYM_FUNC_END(aesbs_xts_decrypt) /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 iv[], u8 final[]) + * int rounds, int blocks, u8 iv[]) */ SYM_FUNC_START(aesbs_ctr_encrypt) - frame_push 8 + stp x29, x30, [sp, #-16]! + mov x29, sp - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - mov x25, x6 - - cmp x25, #0 - cset x26, ne - add x23, x23, x26 // do one extra block if final - - ldp x7, x8, [x24] - ld1 {v0.16b}, [x24] + ldp x7, x8, [x5] + ld1 {v0.16b}, [x5] CPU_LE( rev x7, x7 ) CPU_LE( rev x8, x8 ) adds x8, x8, #1 adc x7, x7, xzr -99: mov x9, #1 - lsl x9, x9, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x9, x9, xzr, le - - tbnz x9, #1, 0f - next_ctr v1 - tbnz x9, #2, 0f +0: next_ctr v1 next_ctr v2 - tbnz x9, #3, 0f next_ctr v3 - tbnz x9, #4, 0f next_ctr v4 - tbnz x9, #5, 0f next_ctr v5 - tbnz x9, #6, 0f next_ctr v6 - tbnz x9, #7, 0f next_ctr v7 -0: mov bskey, x21 - mov rounds, x22 + mov bskey, x2 + mov rounds, x3 bl aesbs_encrypt8 - lsr x9, x9, x26 // disregard the extra block - tbnz x9, #0, 0f - - ld1 {v8.16b}, [x20], #16 - eor v0.16b, v0.16b, v8.16b - st1 {v0.16b}, [x19], #16 - tbnz x9, #1, 1f - - ld1 {v9.16b}, [x20], #16 - eor v1.16b, v1.16b, v9.16b - st1 {v1.16b}, [x19], #16 - tbnz x9, #2, 2f - - ld1 {v10.16b}, [x20], #16 - eor v4.16b, v4.16b, v10.16b - st1 {v4.16b}, [x19], #16 - tbnz x9, #3, 3f + ld1 { v8.16b-v11.16b}, [x1], #64 + ld1 {v12.16b-v15.16b}, [x1], #64 - ld1 {v11.16b}, [x20], #16 - eor v6.16b, v6.16b, v11.16b - st1 {v6.16b}, [x19], #16 - tbnz x9, #4, 4f + eor v8.16b, v0.16b, v8.16b + eor v9.16b, v1.16b, v9.16b + eor v10.16b, v4.16b, v10.16b + eor v11.16b, v6.16b, v11.16b + eor v12.16b, v3.16b, v12.16b + eor v13.16b, v7.16b, v13.16b + eor v14.16b, v2.16b, v14.16b + eor v15.16b, v5.16b, v15.16b - ld1 {v12.16b}, [x20], #16 - eor v3.16b, v3.16b, v12.16b - st1 {v3.16b}, [x19], #16 - tbnz x9, #5, 5f + st1 { v8.16b-v11.16b}, [x0], #64 + st1 {v12.16b-v15.16b}, [x0], #64 - ld1 {v13.16b}, [x20], #16 - eor v7.16b, v7.16b, v13.16b - st1 {v7.16b}, [x19], #16 - tbnz x9, #6, 6f - - ld1 {v14.16b}, [x20], #16 - eor v2.16b, v2.16b, v14.16b - st1 {v2.16b}, [x19], #16 - tbnz x9, #7, 7f + next_ctr v0 + subs x4, x4, #8 + b.gt 0b - ld1 {v15.16b}, [x20], #16 - eor v5.16b, v5.16b, v15.16b - st1 {v5.16b}, [x19], #16 - -8: next_ctr v0 - st1 {v0.16b}, [x24] - cbz x23, .Lctr_done - - b 99b - -.Lctr_done: - frame_pop + st1 {v0.16b}, [x5] + ldp x29, x30, [sp], #16 ret - - /* - * If we are handling the tail of the input (x6 != NULL), return the - * final keystream block back to the caller. - */ -0: cbz x25, 8b - st1 {v0.16b}, [x25] - b 8b -1: cbz x25, 8b - st1 {v1.16b}, [x25] - b 8b -2: cbz x25, 8b - st1 {v4.16b}, [x25] - b 8b -3: cbz x25, 8b - st1 {v6.16b}, [x25] - b 8b -4: cbz x25, 8b - st1 {v3.16b}, [x25] - b 8b -5: cbz x25, 8b - st1 {v7.16b}, [x25] - b 8b -6: cbz x25, 8b - st1 {v2.16b}, [x25] - b 8b -7: cbz x25, 8b - st1 {v5.16b}, [x25] - b 8b SYM_FUNC_END(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 8df6ad8cb09d6..3189003e1cbe5 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[], u8 final[]); + int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); @@ -46,6 +46,8 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks); asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); +asmlinkage void neon_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int bytes, u8 ctr[]); asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int bytes, u32 const rk2[], u8 iv[], int first); @@ -58,7 +60,7 @@ struct aesbs_ctx { int rounds; } __aligned(AES_BLOCK_SIZE); -struct aesbs_cbc_ctx { +struct aesbs_cbc_ctr_ctx { struct aesbs_ctx key; u32 enc[AES_MAX_KEYLENGTH_U32]; }; @@ -128,10 +130,10 @@ static int ecb_decrypt(struct skcipher_request *req) return __ecb_crypt(req, aesbs_ecb_decrypt); } -static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, +static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_aes_ctx rk; int err; @@ -154,7 +156,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, static int cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; int err; @@ -177,7 +179,7 @@ static int cbc_encrypt(struct skcipher_request *req) static int cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; int err; @@ -205,40 +207,32 @@ static int cbc_decrypt(struct skcipher_request *req) static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; - u8 buf[AES_BLOCK_SIZE]; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes > 0) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; - - if (walk.nbytes < walk.total) { - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - final = NULL; - } + int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; + int nbytes = walk.nbytes % (8 * AES_BLOCK_SIZE); + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; kernel_neon_begin(); - aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->rk, ctx->rounds, blocks, walk.iv, final); - kernel_neon_end(); - - if (final) { - u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - - crypto_xor_cpy(dst, src, final, - walk.total % AES_BLOCK_SIZE); - - err = skcipher_walk_done(&walk, 0); - break; + if (blocks >= 8) { + aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds, + blocks, walk.iv); + dst += blocks * AES_BLOCK_SIZE; + src += blocks * AES_BLOCK_SIZE; } - err = skcipher_walk_done(&walk, - walk.nbytes - blocks * AES_BLOCK_SIZE); + if (nbytes && walk.nbytes == walk.total) { + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, + nbytes, walk.iv); + nbytes = 0; + } + kernel_neon_end(); + err = skcipher_walk_done(&walk, nbytes); } return err; } @@ -402,14 +396,14 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_driver_name = "cbc-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx), .base.cra_module = THIS_MODULE, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .walksize = 8 * AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_cbc_setkey, + .setkey = aesbs_cbc_ctr_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, { @@ -417,7 +411,7 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_driver_name = "ctr-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx), .base.cra_module = THIS_MODULE, .min_keysize = AES_MIN_KEY_SIZE, @@ -425,7 +419,7 @@ static struct skcipher_alg aes_algs[] = { { .chunksize = AES_BLOCK_SIZE, .walksize = 8 * AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_setkey, + .setkey = aesbs_cbc_ctr_setkey, .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, }, { -- GitLab From dfc6031ec917b7c34a7549c3120f841b2e2be6db Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Jan 2022 12:35:45 +0100 Subject: [PATCH 0263/1586] crypto: arm64/aes-neonbs-xts - use plain NEON for non-power-of-2 input sizes Even though the kernel's implementations of AES-XTS were updated to implement ciphertext stealing and can operate on inputs of any size larger than or equal to the AES block size, this feature is rarely used in practice. In fact, in the kernel, AES-XTS is only used to operate on 4096 or 512 byte blocks, which means that not only the ciphertext stealing is effectively dead code, the logic in the bit sliced NEON implementation to deal with fewer than 8 blocks at a time is also never used. Since the bit-sliced NEON driver already depends on the plain NEON version, which is slower but can operate on smaller data quantities more straightforwardly, let's fallback to the plain NEON implementation of XTS for any residual inputs that are not multiples of 128 bytes. This allows us to remove a lot of complicated logic that rarely gets exercised in practice. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neonbs-core.S | 132 +++++++++------------------- arch/arm64/crypto/aes-neonbs-glue.c | 33 +++---- 2 files changed, 57 insertions(+), 108 deletions(-) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index f2761481181d4..d427f4556b6eb 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -735,119 +735,67 @@ SYM_FUNC_END(aesbs_cbc_decrypt) * int blocks, u8 iv[]) */ SYM_FUNC_START_LOCAL(__xts_crypt8) - mov x6, #1 - lsl x6, x6, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x6, x6, xzr, mi + movi v18.2s, #0x1 + movi v19.2s, #0x87 + uzp1 v18.4s, v18.4s, v19.4s + + ld1 {v0.16b-v3.16b}, [x1], #64 + ld1 {v4.16b-v7.16b}, [x1], #64 + + next_tweak v26, v25, v18, v19 + next_tweak v27, v26, v18, v19 + next_tweak v28, v27, v18, v19 + next_tweak v29, v28, v18, v19 + next_tweak v30, v29, v18, v19 + next_tweak v31, v30, v18, v19 + next_tweak v16, v31, v18, v19 + next_tweak v17, v16, v18, v19 - ld1 {v0.16b}, [x20], #16 - next_tweak v26, v25, v30, v31 eor v0.16b, v0.16b, v25.16b - tbnz x6, #1, 0f - - ld1 {v1.16b}, [x20], #16 - next_tweak v27, v26, v30, v31 eor v1.16b, v1.16b, v26.16b - tbnz x6, #2, 0f - - ld1 {v2.16b}, [x20], #16 - next_tweak v28, v27, v30, v31 eor v2.16b, v2.16b, v27.16b - tbnz x6, #3, 0f - - ld1 {v3.16b}, [x20], #16 - next_tweak v29, v28, v30, v31 eor v3.16b, v3.16b, v28.16b - tbnz x6, #4, 0f - - ld1 {v4.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset] eor v4.16b, v4.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #5, 0f - - ld1 {v5.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 16] - eor v5.16b, v5.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #6, 0f + eor v5.16b, v5.16b, v30.16b + eor v6.16b, v6.16b, v31.16b + eor v7.16b, v7.16b, v16.16b - ld1 {v6.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 32] - eor v6.16b, v6.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #7, 0f + stp q16, q17, [sp, #16] - ld1 {v7.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 48] - eor v7.16b, v7.16b, v29.16b - next_tweak v29, v29, v30, v31 - -0: mov bskey, x21 - mov rounds, x22 + mov bskey, x2 + mov rounds, x3 br x16 SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - frame_push 6, 64 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 + stp x29, x30, [sp, #-48]! + mov x29, sp - movi v30.2s, #0x1 - movi v25.2s, #0x87 - uzp1 v30.4s, v30.4s, v25.4s - ld1 {v25.16b}, [x24] + ld1 {v25.16b}, [x5] -99: adr x16, \do8 +0: adr x16, \do8 bl __xts_crypt8 - ldp q16, q17, [sp, #.Lframe_local_offset] - ldp q18, q19, [sp, #.Lframe_local_offset + 32] + eor v16.16b, \o0\().16b, v25.16b + eor v17.16b, \o1\().16b, v26.16b + eor v18.16b, \o2\().16b, v27.16b + eor v19.16b, \o3\().16b, v28.16b - eor \o0\().16b, \o0\().16b, v25.16b - eor \o1\().16b, \o1\().16b, v26.16b - eor \o2\().16b, \o2\().16b, v27.16b - eor \o3\().16b, \o3\().16b, v28.16b + ldp q24, q25, [sp, #16] - st1 {\o0\().16b}, [x19], #16 - mov v25.16b, v26.16b - tbnz x6, #1, 1f - st1 {\o1\().16b}, [x19], #16 - mov v25.16b, v27.16b - tbnz x6, #2, 1f - st1 {\o2\().16b}, [x19], #16 - mov v25.16b, v28.16b - tbnz x6, #3, 1f - st1 {\o3\().16b}, [x19], #16 - mov v25.16b, v29.16b - tbnz x6, #4, 1f - - eor \o4\().16b, \o4\().16b, v16.16b - eor \o5\().16b, \o5\().16b, v17.16b - eor \o6\().16b, \o6\().16b, v18.16b - eor \o7\().16b, \o7\().16b, v19.16b - - st1 {\o4\().16b}, [x19], #16 - tbnz x6, #5, 1f - st1 {\o5\().16b}, [x19], #16 - tbnz x6, #6, 1f - st1 {\o6\().16b}, [x19], #16 - tbnz x6, #7, 1f - st1 {\o7\().16b}, [x19], #16 + eor v20.16b, \o4\().16b, v29.16b + eor v21.16b, \o5\().16b, v30.16b + eor v22.16b, \o6\().16b, v31.16b + eor v23.16b, \o7\().16b, v24.16b - cbz x23, 1f - st1 {v25.16b}, [x24] + st1 {v16.16b-v19.16b}, [x0], #64 + st1 {v20.16b-v23.16b}, [x0], #64 - b 99b + subs x4, x4, #8 + b.gt 0b -1: st1 {v25.16b}, [x24] - frame_pop + st1 {v25.16b}, [x5] + ldp x29, x30, [sp], #48 ret .endm diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 3189003e1cbe5..bac4cabef6073 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -302,23 +302,18 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, return err; while (walk.nbytes >= AES_BLOCK_SIZE) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - - if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE) - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - + int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; out = walk.dst.virt.addr; in = walk.src.virt.addr; nbytes = walk.nbytes; kernel_neon_begin(); - if (likely(blocks > 6)) { /* plain NEON is faster otherwise */ - if (first) + if (blocks >= 8) { + if (first == 1) neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); - first = 0; + first = 2; fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); @@ -327,10 +322,17 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in += blocks * AES_BLOCK_SIZE; nbytes -= blocks * AES_BLOCK_SIZE; } - - if (walk.nbytes == walk.total && nbytes > 0) - goto xts_tail; - + if (walk.nbytes == walk.total && nbytes > 0) { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + nbytes = first = 0; + } kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } @@ -355,13 +357,12 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, nbytes = walk.nbytes; kernel_neon_begin(); -xts_tail: if (encrypt) neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first ?: 2); + nbytes, ctx->twkey, walk.iv, first); else neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first ?: 2); + nbytes, ctx->twkey, walk.iv, first); kernel_neon_end(); return skcipher_walk_done(&walk, 0); -- GitLab From a43bed8220f26178f0f63bf4c0fc23b7b8b76763 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 28 Jan 2022 08:02:54 +0100 Subject: [PATCH 0264/1586] hwrng: core - credit entropy for low quality sources of randomness In case the entropy quality is low, there may be less than one bit to credit in the call to add_hwgenerator_randomness(): The number of bytes returned by rng_get_data() multiplied by the current quality (in entropy bits per 1024 bits of input) must be larger than 128 to credit at least one bit. However, imx-rngc.c sets the quality to 19, but may return less than 32 bytes; hid_u2fzero.c sets the quality to 1; and users may override the quality setting manually. In case there is less than one bit to credit, keep track of it and add that credit to the next iteration. Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index bc9f95cbac921..f327f7493585e 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -424,6 +424,7 @@ static int __init register_miscdev(void) static int hwrng_fillfn(void *unused) { + size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */ long rc; while (!kthread_should_stop()) { @@ -445,9 +446,17 @@ static int hwrng_fillfn(void *unused) msleep_interruptible(10000); continue; } + + /* If we cannot credit at least one bit of entropy, + * keep track of the remainder for the next iteration + */ + entropy = rc * current_quality * 8 + entropy_credit; + if ((entropy >> 10) == 0) + entropy_credit = entropy; + /* Outside lock, sure, but y'know: randomness. */ add_hwgenerator_randomness((void *)rng_fillbuf, rc, - rc * current_quality * 8 >> 10); + entropy >> 10); } hwrng_fill = NULL; return 0; -- GitLab From 16d20a08f15ee6ad9a4c1e17ede613927d1c553e Mon Sep 17 00:00:00 2001 From: Kavyasree Kotagiri Date: Fri, 28 Jan 2022 12:47:55 +0530 Subject: [PATCH 0265/1586] crypto: atmel - add support for AES and SHA IPs available on lan966x SoC This patch adds support for hardware version of AES and SHA IPs available on lan966x SoC. Signed-off-by: Kavyasree Kotagiri Reviewed-by: Tudor Ambarus Tested-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 1 + drivers/crypto/atmel-sha.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index fe05584031914..f72c6b3e4ad81 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -2509,6 +2509,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { + case 0x700: case 0x500: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 1b13f601fd959..d1628112dacc1 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -2508,6 +2508,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { + case 0x700: case 0x510: dd->caps.has_dma = 1; dd->caps.has_dualbuff = 1; -- GitLab From 388ac25efc8ce3bf9768ce7bf24268d6fac285d5 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 28 Jan 2022 16:38:35 +0800 Subject: [PATCH 0266/1586] crypto: tcrypt - remove all multibuffer ahash tests The multibuffer algorithms was removed already in 2018, so it is necessary to clear the test code left by tcrypt. Suggested-by: Herbert Xu Signed-off-by: Tianjia Zhang Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 224 ------------------------------------------------ 1 file changed, 224 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 82b5eef2246a2..2a808e843de52 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -724,200 +724,6 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) return crypto_wait_req(ret, wait); } -struct test_mb_ahash_data { - struct scatterlist sg[XBUFSIZE]; - char result[64]; - struct ahash_request *req; - struct crypto_wait wait; - char *xbuf[XBUFSIZE]; -}; - -static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb, - int *rc) -{ - int i, err = 0; - - /* Fire up a bunch of concurrent requests */ - for (i = 0; i < num_mb; i++) - rc[i] = crypto_ahash_digest(data[i].req); - - /* Wait for all requests to finish */ - for (i = 0; i < num_mb; i++) { - rc[i] = crypto_wait_req(rc[i], &data[i].wait); - - if (rc[i]) { - pr_info("concurrent request %d error %d\n", i, rc[i]); - err = rc[i]; - } - } - - return err; -} - -static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen, - int secs, u32 num_mb) -{ - unsigned long start, end; - int bcount; - int ret = 0; - int *rc; - - rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL); - if (!rc) - return -ENOMEM; - - for (start = jiffies, end = start + secs * HZ, bcount = 0; - time_before(jiffies, end); bcount++) { - ret = do_mult_ahash_op(data, num_mb, rc); - if (ret) - goto out; - } - - pr_cont("%d operations in %d seconds (%llu bytes)\n", - bcount * num_mb, secs, (u64)bcount * blen * num_mb); - -out: - kfree(rc); - return ret; -} - -static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen, - u32 num_mb) -{ - unsigned long cycles = 0; - int ret = 0; - int i; - int *rc; - - rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL); - if (!rc) - return -ENOMEM; - - /* Warm-up run. */ - for (i = 0; i < 4; i++) { - ret = do_mult_ahash_op(data, num_mb, rc); - if (ret) - goto out; - } - - /* The real thing. */ - for (i = 0; i < 8; i++) { - cycles_t start, end; - - start = get_cycles(); - ret = do_mult_ahash_op(data, num_mb, rc); - end = get_cycles(); - - if (ret) - goto out; - - cycles += end - start; - } - - pr_cont("1 operation in %lu cycles (%d bytes)\n", - (cycles + 4) / (8 * num_mb), blen); - -out: - kfree(rc); - return ret; -} - -static void test_mb_ahash_speed(const char *algo, unsigned int secs, - struct hash_speed *speed, u32 num_mb) -{ - struct test_mb_ahash_data *data; - struct crypto_ahash *tfm; - unsigned int i, j, k; - int ret; - - data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL); - if (!data) - return; - - tfm = crypto_alloc_ahash(algo, 0, 0); - if (IS_ERR(tfm)) { - pr_err("failed to load transform for %s: %ld\n", - algo, PTR_ERR(tfm)); - goto free_data; - } - - for (i = 0; i < num_mb; ++i) { - if (testmgr_alloc_buf(data[i].xbuf)) - goto out; - - crypto_init_wait(&data[i].wait); - - data[i].req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!data[i].req) { - pr_err("alg: hash: Failed to allocate request for %s\n", - algo); - goto out; - } - - ahash_request_set_callback(data[i].req, 0, crypto_req_done, - &data[i].wait); - - sg_init_table(data[i].sg, XBUFSIZE); - for (j = 0; j < XBUFSIZE; j++) { - sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE); - memset(data[i].xbuf[j], 0xff, PAGE_SIZE); - } - } - - pr_info("\ntesting speed of multibuffer %s (%s)\n", algo, - get_driver_name(crypto_ahash, tfm)); - - for (i = 0; speed[i].blen != 0; i++) { - /* For some reason this only tests digests. */ - if (speed[i].blen != speed[i].plen) - continue; - - if (speed[i].blen > XBUFSIZE * PAGE_SIZE) { - pr_err("template (%u) too big for tvmem (%lu)\n", - speed[i].blen, XBUFSIZE * PAGE_SIZE); - goto out; - } - - if (klen) - crypto_ahash_setkey(tfm, tvmem[0], klen); - - for (k = 0; k < num_mb; k++) - ahash_request_set_crypt(data[k].req, data[k].sg, - data[k].result, speed[i].blen); - - pr_info("test%3u " - "(%5u byte blocks,%5u bytes per update,%4u updates): ", - i, speed[i].blen, speed[i].plen, - speed[i].blen / speed[i].plen); - - if (secs) { - ret = test_mb_ahash_jiffies(data, speed[i].blen, secs, - num_mb); - cond_resched(); - } else { - ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb); - } - - - if (ret) { - pr_err("At least one hashing failed ret=%d\n", ret); - break; - } - } - -out: - for (k = 0; k < num_mb; ++k) - ahash_request_free(data[k].req); - - for (k = 0; k < num_mb; ++k) - testmgr_free_buf(data[k].xbuf); - - crypto_free_ahash(tfm); - -free_data: - kfree(data); -} - static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, char *out, int secs) { @@ -2574,36 +2380,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("sm3", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; - case 450: - test_mb_ahash_speed("sha1", sec, generic_hash_speed_template, - num_mb); - if (mode > 400 && mode < 500) break; - fallthrough; - case 451: - test_mb_ahash_speed("sha256", sec, generic_hash_speed_template, - num_mb); - if (mode > 400 && mode < 500) break; - fallthrough; - case 452: - test_mb_ahash_speed("sha512", sec, generic_hash_speed_template, - num_mb); - if (mode > 400 && mode < 500) break; - fallthrough; - case 453: - test_mb_ahash_speed("sm3", sec, generic_hash_speed_template, - num_mb); - if (mode > 400 && mode < 500) break; - fallthrough; - case 454: - test_mb_ahash_speed("streebog256", sec, - generic_hash_speed_template, num_mb); - if (mode > 400 && mode < 500) break; - fallthrough; - case 455: - test_mb_ahash_speed("streebog512", sec, - generic_hash_speed_template, num_mb); - if (mode > 400 && mode < 500) break; - fallthrough; case 499: break; -- GitLab From 2d841af23ae8f398c85dd1ff2dc24b5ec8ba4569 Mon Sep 17 00:00:00 2001 From: Shijith Thotton Date: Fri, 28 Jan 2022 19:27:42 +0530 Subject: [PATCH 0267/1586] crypto: octeontx2 - remove CONFIG_DM_CRYPT check No issues were found while using the driver with dm-crypt enabled. So CONFIG_DM_CRYPT check in the driver can be removed. This also fixes the NULL pointer dereference in driver release if CONFIG_DM_CRYPT is enabled. ... Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 ... Call trace: crypto_unregister_alg+0x68/0xfc crypto_unregister_skciphers+0x44/0x60 otx2_cpt_crypto_exit+0x100/0x1a0 otx2_cptvf_remove+0xf8/0x200 pci_device_remove+0x3c/0xd4 __device_release_driver+0x188/0x234 device_release_driver+0x2c/0x4c ... Fixes: 6f03f0e8b6c8 ("crypto: octeontx2 - register with linux crypto framework") Signed-off-by: Shijith Thotton Signed-off-by: Herbert Xu --- .../crypto/marvell/octeontx2/otx2_cptvf_algs.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 2748a3327e391..f8f8542ce3e47 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -1634,16 +1634,13 @@ static inline int cpt_register_algs(void) { int i, err = 0; - if (!IS_ENABLED(CONFIG_DM_CRYPT)) { - for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) - otx2_cpt_skciphers[i].base.cra_flags &= - ~CRYPTO_ALG_DEAD; - - err = crypto_register_skciphers(otx2_cpt_skciphers, - ARRAY_SIZE(otx2_cpt_skciphers)); - if (err) - return err; - } + for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) + otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + if (err) + return err; for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++) otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; -- GitLab From c717993dd76a1049093af5c262e751d901b8da10 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 30 Jan 2022 22:53:25 +0000 Subject: [PATCH 0268/1586] crypto: marvell/octeontx - remove redundant initialization of variable c_size Variable c_size is being initialized with a value that is never read, it is being re-assigned with a different value later on. The initialization is redundant and can be removed. Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx/otx_cptvf_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c index b681bd2dc6add..36d72e35ebeb6 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -204,7 +204,6 @@ static int alloc_command_queues(struct otx_cptvf *cptvf, /* per queue initialization */ for (i = 0; i < cptvf->num_queues; i++) { - c_size = 0; rem_q_size = q_size; first = NULL; last = NULL; -- GitLab From 95e26b0391d085bbdbe1a82ffaaf2f92a3f71433 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 30 Jan 2022 23:05:15 +0000 Subject: [PATCH 0269/1586] crypto: sl3516 - remove redundant initializations of pointers in_sg and out_sg Pointers in_sg and out_sg are being initialized with values that are never read, they are being re-assigned the same values later on. The initializations are redundant, remove them in preference to the later assignments that are closer to when the pointers are being used. Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- drivers/crypto/gemini/sl3516-ce-cipher.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c index c1c2b1d866639..53e3fefb81de5 100644 --- a/drivers/crypto/gemini/sl3516-ce-cipher.c +++ b/drivers/crypto/gemini/sl3516-ce-cipher.c @@ -23,8 +23,8 @@ static bool sl3516_ce_need_fallback(struct skcipher_request *areq) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); struct sl3516_ce_dev *ce = op->ce; - struct scatterlist *in_sg = areq->src; - struct scatterlist *out_sg = areq->dst; + struct scatterlist *in_sg; + struct scatterlist *out_sg; struct scatterlist *sg; if (areq->cryptlen == 0 || areq->cryptlen % 16) { -- GitLab From f154066b61dfde618d98fdafc8cadde076c7f222 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 6 Feb 2022 09:20:08 -0800 Subject: [PATCH 0270/1586] gcc-plugins/stackleak: Provide verbose mode In order to compare instrumentation between builds, make the verbose mode of the plugin available during the build. This is rarely needed (behind EXPERT) and very noisy (disabled for COMPILE_TEST). Cc: Alexander Popov Signed-off-by: Kees Cook --- scripts/Makefile.gcc-plugins | 2 ++ security/Kconfig.hardening | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 1d16ca1b78c91..f67153b260c08 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -37,6 +37,8 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \ += -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE) gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \ += -fplugin-arg-stackleak_plugin-arch=$(SRCARCH) +gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK_VERBOSE) \ + += -fplugin-arg-stackleak_plugin-verbose ifdef CONFIG_GCC_PLUGIN_STACKLEAK DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable endif diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index d051f8ceefddd..ded4d7c0d1322 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -174,6 +174,16 @@ config GCC_PLUGIN_STACKLEAK * https://grsecurity.net/ * https://pax.grsecurity.net/ +config GCC_PLUGIN_STACKLEAK_VERBOSE + bool "Report stack depth analysis instrumentation" if EXPERT + depends on GCC_PLUGIN_STACKLEAK + depends on !COMPILE_TEST # too noisy + help + This option will cause a warning to be printed each time the + stackleak plugin finds a function it thinks needs to be + instrumented. This is useful for comparing coverage between + builds. + config STACKLEAK_TRACK_MIN_SIZE int "Minimum stack frame size of functions tracked by STACKLEAK" default 100 -- GitLab From 27e9faf415dbf94af19b9c827842435edbc1fbbc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 6 Feb 2022 09:08:20 -0800 Subject: [PATCH 0271/1586] gcc-plugins/stackleak: Exactly match strings instead of prefixes Since STRING_CST may not be NUL terminated, strncmp() was used for check for equality. However, this may lead to mismatches for longer section names where the start matches the tested-for string. Test for exact equality by checking for the presences of NUL termination. Cc: Alexander Popov Signed-off-by: Kees Cook --- scripts/gcc-plugins/stackleak_plugin.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index e9db7dcb3e5f4..b04aa8e91a41f 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -429,6 +429,23 @@ static unsigned int stackleak_cleanup_execute(void) return 0; } +/* + * STRING_CST may or may not be NUL terminated: + * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html + */ +static inline bool string_equal(tree node, const char *string, int length) +{ + if (TREE_STRING_LENGTH(node) < length) + return false; + if (TREE_STRING_LENGTH(node) > length + 1) + return false; + if (TREE_STRING_LENGTH(node) == length + 1 && + TREE_STRING_POINTER(node)[length] != '\0') + return false; + return !memcmp(TREE_STRING_POINTER(node), string, length); +} +#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str)) + static bool stackleak_gate(void) { tree section; @@ -438,13 +455,13 @@ static bool stackleak_gate(void) if (section && TREE_VALUE(section)) { section = TREE_VALUE(TREE_VALUE(section)); - if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10)) + if (STRING_EQUAL(section, ".init.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13)) + if (STRING_EQUAL(section, ".devinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13)) + if (STRING_EQUAL(section, ".cpuinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13)) + if (STRING_EQUAL(section, ".meminit.text")) return false; } -- GitLab From ae978009fc013e3166c9f523f8b17e41a3c0286e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 6 Feb 2022 09:12:50 -0800 Subject: [PATCH 0272/1586] gcc-plugins/stackleak: Ignore .noinstr.text and .entry.text The .noinstr.text section functions may not have "current()" sanely available. Similarly true for .entry.text, though such a check is currently redundant. Add a check for both. In an x86_64 defconfig build, the following functions no longer receive stackleak instrumentation: __do_fast_syscall_32() do_int80_syscall_32() do_machine_check() do_syscall_64() exc_general_protection() fixup_bad_iret() Suggested-by: Peter Zijlstra Cc: Alexander Popov Signed-off-by: Kees Cook --- scripts/gcc-plugins/stackleak_plugin.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index b04aa8e91a41f..42f0252ee2a4e 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -463,6 +463,10 @@ static bool stackleak_gate(void) return false; if (STRING_EQUAL(section, ".meminit.text")) return false; + if (STRING_EQUAL(section, ".noinstr.text")) + return false; + if (STRING_EQUAL(section, ".entry.text")) + return false; } return track_frame_size >= 0; -- GitLab From 5a6bbd1d18cabf5a680e726f0ef8f6dda0105fe8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 14:14:53 +0000 Subject: [PATCH 0273/1586] dt-bindings: arm-pmu: Document Apple PMU compatible strings As we are about to add support fur the Apple PMUs, document the compatible strings associated with the two micro-architectures present in the Apple M1. Acked-by: Rob Herring Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- Documentation/devicetree/bindings/arm/pmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 981bac4516988..7a04b8aaaec30 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -20,6 +20,8 @@ properties: items: - enum: - apm,potenza-pmu + - apple,firestorm-pmu + - apple,icestorm-pmu - arm,armv8-pmuv3 # Only for s/w models - arm,arm1136-pmu - arm,arm1176-pmu -- GitLab From 74703b13f9d2ef286ef588f29295a2fd30b5f295 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Nov 2021 19:58:42 +0000 Subject: [PATCH 0274/1586] dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts Advertise the two pseudo-interrupts that tied to the two PMU flavours present in the Apple M1 SoC. We choose the expose two different pseudo-interrupts to the OS as the e-core PMU is obviously different from the p-core one, effectively presenting two different devices. Acked-by: Rob Herring Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- .../devicetree/bindings/interrupt-controller/apple,aic.yaml | 2 ++ include/dt-bindings/interrupt-controller/apple-aic.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml index 97359024709af..c7577d401786c 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml @@ -56,6 +56,8 @@ properties: - 1: virtual HV timer - 2: physical guest timer - 3: virtual guest timer + - 4: 'efficient' CPU PMU + - 5: 'performance' CPU PMU The 3rd cell contains the interrupt flags. This is normally IRQ_TYPE_LEVEL_HIGH (4). diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h index 604f2bb30ac0d..bf3aac0e54915 100644 --- a/include/dt-bindings/interrupt-controller/apple-aic.h +++ b/include/dt-bindings/interrupt-controller/apple-aic.h @@ -11,5 +11,7 @@ #define AIC_TMR_HV_VIRT 1 #define AIC_TMR_GUEST_PHYS 2 #define AIC_TMR_GUEST_VIRT 3 +#define AIC_CPU_PMU_E 4 +#define AIC_CPU_PMU_P 5 #endif -- GitLab From dba07ad11384d6a4ece4acda1fbe726222ca7ad0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Dec 2021 16:49:04 +0000 Subject: [PATCH 0275/1586] dt-bindings: apple,aic: Add affinity description for per-cpu pseudo-interrupts Some of the FIQ per-cpu pseudo-interrupts are better described with a specific affinity, the most obvious candidate being the CPU PMUs. Augment the AIC binding to be able to specify that affinity in the interrupt controller node. Reviewed-by: Rob Herring Signed-off-by: Marc Zyngier --- .../interrupt-controller/apple,aic.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml index c7577d401786c..85c85b694217c 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml @@ -70,6 +70,35 @@ properties: power-domains: maxItems: 1 + affinities: + type: object + additionalProperties: false + description: + FIQ affinity can be expressed as a single "affinities" node, + containing a set of sub-nodes, one per FIQ with a non-default + affinity. + patternProperties: + "^.+-affinity$": + type: object + additionalProperties: false + properties: + apple,fiq-index: + description: + The interrupt number specified as a FIQ, and for which + the affinity is not the default. + $ref: /schemas/types.yaml#/definitions/uint32 + maximum: 5 + + cpus: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Should be a list of phandles to CPU nodes (as described in + Documentation/devicetree/bindings/arm/cpus.yaml). + + required: + - fiq-index + - cpus + required: - compatible - '#interrupt-cells' -- GitLab From a5e8801202b318622ea526aa5625e5f7eceb4d26 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 13:35:25 +0000 Subject: [PATCH 0276/1586] irqchip/apple-aic: Parse FIQ affinities from device-tree In order to be able to tell the core IRQ code about the affinity of the PMU interrupt in later patches, parse the affinities kindly provided in the device-tree. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-apple-aic.c | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 38091ebb94033..22d9b2058612b 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -177,6 +177,9 @@ struct aic_irq_chip { void __iomem *base; struct irq_domain *hw_domain; struct irq_domain *ipi_domain; + struct { + cpumask_t aff; + } *fiq_aff[AIC_NR_FIQ]; int nr_hw; }; @@ -793,12 +796,50 @@ static struct gic_kvm_info vgic_info __initdata = { .no_hw_deactivation = true, }; +static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff) +{ + int i, n; + u32 fiq; + + if (of_property_read_u32(aff, "apple,fiq-index", &fiq) || + WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq]) + return; + + n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32)); + if (WARN_ON(n < 0)) + return; + + ic->fiq_aff[fiq] = kzalloc(sizeof(ic->fiq_aff[fiq]), GFP_KERNEL); + if (!ic->fiq_aff[fiq]) + return; + + for (i = 0; i < n; i++) { + struct device_node *cpu_node; + u32 cpu_phandle; + int cpu; + + if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle)) + continue; + + cpu_node = of_find_node_by_phandle(cpu_phandle); + if (WARN_ON(!cpu_node)) + continue; + + cpu = of_cpu_node_to_id(cpu_node); + if (WARN_ON(cpu < 0)) + continue; + + cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff); + } +} + static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent) { int i; void __iomem *regs; u32 info; struct aic_irq_chip *irqc; + struct device_node *affs; regs = of_iomap(node, 0); if (WARN_ON(!regs)) @@ -832,6 +873,14 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p return -ENODEV; } + affs = of_get_child_by_name(node, "affinities"); + if (affs) { + struct device_node *chld; + + for_each_child_of_node(affs, chld) + build_fiq_affinity(irqc, chld); + } + set_handle_irq(aic_handle_irq); set_handle_fiq(aic_handle_fiq); -- GitLab From c7708816c9442beb32488e07b0fb47b6f66577cb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Nov 2021 19:59:20 +0000 Subject: [PATCH 0277/1586] irqchip/apple-aic: Wire PMU interrupts Add the necessary code to configure and P and E-core PMU interrupts with their respective affinities. When such an interrupt fires, map it onto the right pseudo-interrupt. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-apple-aic.c | 34 +++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 22d9b2058612b..873544e58676f 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -155,7 +155,7 @@ #define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4) #define UPMSR_IACT BIT(0) -#define AIC_NR_FIQ 4 +#define AIC_NR_FIQ 6 #define AIC_NR_SWIPI 32 /* @@ -415,16 +415,15 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) aic_irqc->nr_hw + AIC_TMR_EL02_VIRT); } - if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == - (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { - /* - * Not supported yet, let's figure out how to handle this when - * we implement these proprietary performance counters. For now, - * just mask it and move on. - */ - pr_err_ratelimited("PMC FIQ fired. Masking.\n"); - sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT, - FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF)); + if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + int irq; + if (cpumask_test_cpu(smp_processor_id(), + &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) + irq = AIC_CPU_PMU_P; + else + irq = AIC_CPU_PMU_E; + generic_handle_domain_irq(aic_irqc->hw_domain, + aic_irqc->nr_hw + irq); } if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ && @@ -464,7 +463,18 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq, handle_fasteoi_irq, NULL, NULL); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); } else { - irq_set_percpu_devid(irq); + int fiq = hw - ic->nr_hw; + + switch (fiq) { + case AIC_CPU_PMU_P: + case AIC_CPU_PMU_E: + irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff); + break; + default: + irq_set_percpu_devid(irq); + break; + } + irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data, handle_percpu_devid_irq, NULL, NULL); } -- GitLab From 1852e22b318b8d1c02b574da679b1b74f3686090 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Dec 2021 15:56:55 +0000 Subject: [PATCH 0278/1586] arm64: dts: apple: Add t8103 PMU interrupt affinities The two PMU pseudo interrupts have specific affinities. One set is affine to the small cores, and the other set affine to the big ones. Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 19afbc91020a2..a2e006538c568 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -213,6 +213,18 @@ interrupt-controller; reg = <0x2 0x3b100000 0x0 0x8000>; power-domains = <&ps_aic>; + + affinities { + e-core-pmu-affinity { + apple,fiq-index = ; + cpus = <&cpu0 &cpu1 &cpu2 &cpu3>; + }; + + p-core-pmu-affinity { + apple,fiq-index = ; + cpus = <&cpu4 &cpu5 &cpu6 &cpu7>; + }; + }; }; pmgr: power-management@23b700000 { -- GitLab From 0f522efcd79634a6113195842ee763dc6ebacfbb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Nov 2021 17:09:49 +0000 Subject: [PATCH 0279/1586] arm64: dts: apple: Add t8303 PMU nodes Advertise the two PMU nodes for the t8103 SoC. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index a2e006538c568..9f8f4145db880 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -97,6 +97,18 @@ ; }; + pmu-e { + compatible = "apple,icestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = ; + }; + + pmu-p { + compatible = "apple,firestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = ; + }; + clkref: clock-ref { compatible = "fixed-clock"; #clock-cells = <0>; -- GitLab From 11db7410cfcba2e5ffed7b8bb2a57d4dd5e22063 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 13:55:19 +0000 Subject: [PATCH 0280/1586] irqchip/apple-aic: Move PMU-specific registers to their own include file As we are about to have a PMU driver, move the PMU bits from the AIC driver into a common include file. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/apple_m1_pmu.h | 19 +++++++++++++++++++ drivers/irqchip/irq-apple-aic.c | 11 +---------- 2 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 arch/arm64/include/asm/apple_m1_pmu.h diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h new file mode 100644 index 0000000000000..b848af7faadc9 --- /dev/null +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_M1_PMU_h +#define __ASM_APPLE_M1_PMU_h + +#include +#include + +/* Core PMC control register */ +#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_IMODE GENMASK(10, 8) +#define PMCR0_IMODE_OFF 0 +#define PMCR0_IMODE_PMI 1 +#define PMCR0_IMODE_AIC 2 +#define PMCR0_IMODE_HALT 3 +#define PMCR0_IMODE_FIQ 4 +#define PMCR0_IACT BIT(11) + +#endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 873544e58676f..b40199c6625e3 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -109,16 +110,6 @@ * Note: sysreg-based IPIs are not supported yet. */ -/* Core PMC control register */ -#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) -#define PMCR0_IMODE GENMASK(10, 8) -#define PMCR0_IMODE_OFF 0 -#define PMCR0_IMODE_PMI 1 -#define PMCR0_IMODE_AIC 2 -#define PMCR0_IMODE_HALT 3 -#define PMCR0_IMODE_FIQ 4 -#define PMCR0_IACT BIT(11) - /* IPI request registers */ #define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0) #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1) -- GitLab From 1018a5463a063715365784704c4e8cdf2eec4b04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 15:19:46 +0800 Subject: [PATCH 0281/1586] f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy Once F2FS_IPU_FORCE policy is enabled in some cases: a) f2fs forces to use F2FS_IPU_FORCE in a small-sized volume b) user sets F2FS_IPU_FORCE policy via sysfs Then we may fail to defragment file due to IPU policy check, it doesn't make sense, let's introduce a new IPU policy to allow OPU during file defragmentation. In small-sized volume, let's enable F2FS_IPU_HONOR_OPU_WRITE policy by default. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 3 ++- fs/f2fs/data.c | 18 +++++++++++++----- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 18 +++++++++++------- fs/f2fs/segment.h | 5 ++++- fs/f2fs/super.c | 3 ++- 6 files changed, 34 insertions(+), 16 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 87d3884c90ea6..7b50bf82f14dd 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -55,8 +55,9 @@ Description: Controls the in-place-update policy. 0x04 F2FS_IPU_UTIL 0x08 F2FS_IPU_SSR_UTIL 0x10 F2FS_IPU_FSYNC - 0x20 F2FS_IPU_ASYNC, + 0x20 F2FS_IPU_ASYNC 0x40 F2FS_IPU_NOCACHE + 0x80 F2FS_IPU_HONOR_OPU_WRITE ==== ================= Refer segment.h for details. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0f124e8de1d4e..6b5f389ba998b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2460,6 +2460,9 @@ static inline bool check_inplace_update_policy(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; + if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) && + is_inode_flag_set(inode, FI_OPU_WRITE)) + return false; if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) @@ -2530,6 +2533,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return true; + if (is_inode_flag_set(inode, FI_OPU_WRITE)) + return true; + if (fio) { if (page_private_gcing(fio->page)) return true; @@ -3154,8 +3160,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; - /* skip writing during file defragment */ - if (is_inode_flag_set(inode, FI_DO_DEFRAG)) + /* skip writing in file defragment preparing stage */ + if (is_inode_flag_set(inode, FI_SKIP_WRITES)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, DATA); @@ -3725,6 +3731,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); + set_inode_flag(inode, FI_OPU_WRITE); for (; secidx < end_sec; secidx++) { f2fs_down_write(&sbi->pin_sem); @@ -3733,7 +3740,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { struct page *page; @@ -3750,7 +3757,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_put_page(page, 1); } - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); ret = filemap_fdatawrite(inode->i_mapping); @@ -3761,7 +3768,8 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, } done: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); + clear_inode_flag(inode, FI_OPU_WRITE); clear_inode_flag(inode, FI_ALIGNED_WRITE); filemap_invalidate_unlock(inode->i_mapping); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51c1392708e6e..3b4bf1c3f1ed6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -740,7 +740,8 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ + FI_SKIP_WRITES, /* should skip data page writeback */ + FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ FI_HOT_DATA, /* indicate file is hot */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ccdd6e347e2c..42fbdcf0ccc9b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2559,10 +2559,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, bool fragmented = false; int err; - /* if in-place-update policy is enabled, don't waste time here */ - if (f2fs_should_update_inplace(inode, NULL)) - return -EINVAL; - pg_start = range->start >> PAGE_SHIFT; pg_end = (range->start + range->len) >> PAGE_SHIFT; @@ -2570,6 +2566,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); + /* if in-place-update policy is enabled, don't waste time here */ + set_inode_flag(inode, FI_OPU_WRITE); + if (f2fs_should_update_inplace(inode, NULL)) { + err = -EINVAL; + goto out; + } + /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, range->start + range->len - 1); @@ -2651,7 +2654,7 @@ do_map: goto check; } - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { @@ -2676,15 +2679,16 @@ check: if (map.m_lblk < pg_end && cnt < blk_per_seg) goto do_map; - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); out: + clear_inode_flag(inode, FI_OPU_WRITE); inode_unlock(inode); if (!err) range->len = (u64)total << PAGE_SHIFT; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 0291cd55cf09b..5c94caf0c0a1d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -651,7 +651,9 @@ static inline int utilization(struct f2fs_sb_info *sbi) * pages over min_fsync_blocks. (=default option) * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. * F2FS_IPU_NOCACHE - disable IPU bio cache. - * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) + * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has + * FI_OPU_WRITE flag. + * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 @@ -667,6 +669,7 @@ enum { F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, F2FS_IPU_NOCACHE, + F2FS_IPU_HONOR_OPU_WRITE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9af6c20532ece..806836184ebc9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3957,7 +3957,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; if (f2fs_block_unit_discard(sbi)) sm_i->dcc_info->discard_granularity = 1; - sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | + 1 << F2FS_IPU_HONOR_OPU_WRITE; } sbi->readdir_ra = 1; -- GitLab From 345be4275cad454ae7e25884369a9c6c25e56279 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 7 Feb 2022 09:38:29 -0700 Subject: [PATCH 0282/1586] thermal: netlink: Fix parameter type of thermal_genl_cpu_capability_event() stub When building with CONFIG_THERMAL_NETLINK=n, there is a spew of warnings along the lines of: In file included from drivers/thermal/thermal_core.c:27: In file included from drivers/thermal/thermal_core.h:15: drivers/thermal/thermal_netlink.h:113:71: warning: declaration of 'struct cpu_capability' will not be visible outside of this function [-Wvisibility] static inline int thermal_genl_cpu_capability_event(int count, struct cpu_capability *caps) ^ 1 warning generated. 'struct cpu_capability' is not forward declared anywhere in the header. As it turns out, this should really be 'struct thermal_genl_cpu_caps', which silences the warning and makes the parameter types of the stub match the full function. Fixes: e4b1eb24ce5a ("thermal: netlink: Add a new event to notify CPU capabilities change") Reported-by: Stephen Rothwell Signed-off-by: Nathan Chancellor Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index 04d1adbbc0124..1052f523188da 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -110,7 +110,7 @@ static inline int thermal_genl_sampling_temp(int id, int temp) return 0; } -static inline int thermal_genl_cpu_capability_event(int count, struct cpu_capability *caps) +static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps) { return 0; } -- GitLab From fa31a4d669bd471e9510db1abf9b91e1a6be6ff7 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 3 Feb 2022 11:43:07 -0800 Subject: [PATCH 0283/1586] x86/cpufeatures: Put the AMX macros in the word 18 block These macros are for bits in CPUID.(EAX=7,ECX=0):EDX, not for bits in CPUID(EAX=7,ECX=1):EAX. Put them with their brethren. [ bp: Sort word 18 bits properly, as caught by Like Xu ] Signed-off-by: Jim Mattson Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20220203194308.2469117-1-jmattson@google.com --- arch/x86/include/asm/cpufeatures.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6db4e2932b3d8..5cd22090e53de 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,9 +299,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -390,7 +387,10 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -- GitLab From 2f04aa69ab5c5c40d2e3e51fd73ce2ecb651e9ba Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 4 Feb 2022 16:52:40 +0100 Subject: [PATCH 0284/1586] regulator: Add bindings for TPS62864x Add bindings for the TPS62864/TPS6286/TPS62868/TPS62869 voltage regulators. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220204155241.576342-2-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- .../bindings/regulator/ti,tps62864.yaml | 62 +++++++++++++++++++ include/dt-bindings/regulator/ti,tps62864.h | 9 +++ 2 files changed, 71 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/ti,tps62864.yaml create mode 100644 include/dt-bindings/regulator/ti,tps62864.h diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml new file mode 100644 index 0000000000000..e3d739a37ab04 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/ti,tps62864.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI TPS62864/TPS6286/TPS62868/TPS62869 voltage regulator + +maintainers: + - Vincent Whitchurch + +properties: + compatible: + enum: + - ti,tps62864 + - ti,tps62866 + - ti,tps62868 + - ti,tps62869 + + reg: + maxItems: 1 + + regulators: + type: object + + patternProperties: + "SW": + type: object + $ref: regulator.yaml# + + additionalProperties: false + +required: + - compatible + - reg + - regulators + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + regulator@48 { + compatible = "ti,tps62864"; + reg = <0x48>; + + regulators { + SW { + regulator-name = "+0.85V"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <890000>; + regulator-initial-mode = ; + }; + }; + }; + }; + +... diff --git a/include/dt-bindings/regulator/ti,tps62864.h b/include/dt-bindings/regulator/ti,tps62864.h new file mode 100644 index 0000000000000..8db31f23d9561 --- /dev/null +++ b/include/dt-bindings/regulator/ti,tps62864.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_REGULATOR_TI_TPS62864_H +#define _DT_BINDINGS_REGULATOR_TI_TPS62864_H + +#define TPS62864_MODE_NORMAL 0 +#define TPS62864_MODE_FPWM 1 + +#endif -- GitLab From e2a01b4e8806087743e5ee42f9dcedfc741d4112 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 4 Feb 2022 16:52:41 +0100 Subject: [PATCH 0285/1586] regulator: Add support for TPS6286x TI's TPS62864/TPS6286/TPS62868/TPS62869 are high-frequency synchronous step-down converters controlled via I2C. There are differences in the electrical characteristics and packaging between the variants, but the register interfaces are identical. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220204155241.576342-3-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 9 ++ drivers/regulator/Makefile | 1 + drivers/regulator/tps6286x-regulator.c | 159 +++++++++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 drivers/regulator/tps6286x-regulator.c diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 22503e4f53272..00559c214f570 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1264,6 +1264,15 @@ config REGULATOR_TPS62360 high-frequency synchronous step down dc-dc converter optimized for battery-powered portable applications. +config REGULATOR_TPS6286X + tristate "TI TPS6286x Power Regulator" + depends on I2C && OF + select REGMAP_I2C + help + This driver supports TPS6236x voltage regulator chips. These are + high-frequency synchronous step-down converters with an I2C + interface. + config REGULATOR_TPS65023 tristate "TI TPS65023 Power regulators" depends on I2C diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 2e1b087489fa5..4b8794a73e170 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -149,6 +149,7 @@ obj-$(CONFIG_REGULATOR_SY8827N) += sy8827n.o obj-$(CONFIG_REGULATOR_TI_ABB) += ti-abb-regulator.o obj-$(CONFIG_REGULATOR_TPS6105X) += tps6105x-regulator.o obj-$(CONFIG_REGULATOR_TPS62360) += tps62360-regulator.o +obj-$(CONFIG_REGULATOR_TPS6286X) += tps6286x-regulator.o obj-$(CONFIG_REGULATOR_TPS65023) += tps65023-regulator.o obj-$(CONFIG_REGULATOR_TPS6507X) += tps6507x-regulator.o obj-$(CONFIG_REGULATOR_TPS65086) += tps65086-regulator.o diff --git a/drivers/regulator/tps6286x-regulator.c b/drivers/regulator/tps6286x-regulator.c new file mode 100644 index 0000000000000..e29deda30d75d --- /dev/null +++ b/drivers/regulator/tps6286x-regulator.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright Axis Communications AB + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define TPS6286X_VOUT1 0x01 +#define TPS6286X_VOUT1_VO1_SET GENMASK(7, 0) + +#define TPS6286X_CONTROL 0x03 +#define TPS6286X_CONTROL_FPWM BIT(4) +#define TPS6286X_CONTROL_SWEN BIT(5) + +#define TPS6286X_MIN_MV 400 +#define TPS6286X_MAX_MV 1675 +#define TPS6286X_STEP_MV 5 + +static const struct regmap_config tps6286x_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static int tps6286x_set_mode(struct regulator_dev *rdev, unsigned int mode) +{ + unsigned int val; + + switch (mode) { + case REGULATOR_MODE_NORMAL: + val = 0; + break; + case REGULATOR_MODE_FAST: + val = TPS6286X_CONTROL_FPWM; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(rdev->regmap, TPS6286X_CONTROL, + TPS6286X_CONTROL_FPWM, val); +} + +static unsigned int tps6286x_get_mode(struct regulator_dev *rdev) +{ + unsigned int val; + int ret; + + ret = regmap_read(rdev->regmap, TPS6286X_CONTROL, &val); + if (ret < 0) + return 0; + + return (val & TPS6286X_CONTROL_FPWM) ? REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL; +} + +static const struct regulator_ops tps6286x_regulator_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .set_mode = tps6286x_set_mode, + .get_mode = tps6286x_get_mode, + .is_enabled = regulator_is_enabled_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .list_voltage = regulator_list_voltage_linear, +}; + +static unsigned int tps6286x_of_map_mode(unsigned int mode) +{ + switch (mode) { + case TPS62864_MODE_NORMAL: + return REGULATOR_MODE_NORMAL; + case TPS62864_MODE_FPWM: + return REGULATOR_MODE_FAST; + default: + return REGULATOR_MODE_INVALID; + } +} + +static const struct regulator_desc tps6286x_reg = { + .name = "tps6286x", + .of_match = of_match_ptr("SW"), + .owner = THIS_MODULE, + .ops = &tps6286x_regulator_ops, + .of_map_mode = tps6286x_of_map_mode, + .regulators_node = of_match_ptr("regulators"), + .type = REGULATOR_VOLTAGE, + .n_voltages = ((TPS6286X_MAX_MV - TPS6286X_MIN_MV) / TPS6286X_STEP_MV) + 1, + .min_uV = TPS6286X_MIN_MV * 1000, + .uV_step = TPS6286X_STEP_MV * 1000, + .vsel_reg = TPS6286X_VOUT1, + .vsel_mask = TPS6286X_VOUT1_VO1_SET, + .enable_reg = TPS6286X_CONTROL, + .enable_mask = TPS6286X_CONTROL_SWEN, + .ramp_delay = 1000, + /* tDelay + tRamp, rounded up */ + .enable_time = 3000, +}; + +static const struct of_device_id tps6286x_dt_ids[] = { + { .compatible = "ti,tps62864", }, + { .compatible = "ti,tps62866", }, + { .compatible = "ti,tps62868", }, + { .compatible = "ti,tps62869", }, + { } +}; +MODULE_DEVICE_TABLE(of, tps6286x_dt_ids); + +static int tps6286x_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct device *dev = &i2c->dev; + struct regulator_config config = {}; + struct regulator_dev *rdev; + struct regmap *regmap; + + regmap = devm_regmap_init_i2c(i2c, &tps6286x_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + config.dev = &i2c->dev; + config.of_node = dev->of_node; + config.regmap = regmap; + + rdev = devm_regulator_register(&i2c->dev, &tps6286x_reg, &config); + if (IS_ERR(rdev)) { + dev_err(&i2c->dev, "Failed to register tps6286x regulator\n"); + return PTR_ERR(rdev); + } + + return 0; +} + +static const struct i2c_device_id tps6286x_i2c_id[] = { + { "tps62864", 0 }, + { "tps62866", 0 }, + { "tps62868", 0 }, + { "tps62869", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, tps6286x_i2c_id); + +static struct i2c_driver tps6286x_regulator_driver = { + .driver = { + .name = "tps6286x", + .of_match_table = of_match_ptr(tps6286x_dt_ids), + }, + .probe = tps6286x_i2c_probe, + .id_table = tps6286x_i2c_id, +}; + +module_i2c_driver(tps6286x_regulator_driver); + +MODULE_LICENSE("GPL v2"); -- GitLab From 60edd652005fe2931565f658f23f86d094f4c1f0 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Tue, 25 Jan 2022 09:23:28 +0800 Subject: [PATCH 0286/1586] spi: Convert spi-slave-mt27xx to json-schema Convert Mediatek ARM SOC's SPI Slave controller binding to json-schema format. Signed-off-by: Leilk Liu Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220125012330.13449-2-leilk.liu@mediatek.com Signed-off-by: Mark Brown --- .../spi/mediatek,spi-slave-mt27xx.yaml | 58 +++++++++++++++++++ .../bindings/spi/spi-slave-mt27xx.txt | 33 ----------- 2 files changed, 58 insertions(+), 33 deletions(-) create mode 100644 Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml delete mode 100644 Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml new file mode 100644 index 0000000000000..7977799a8ee10 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/mediatek,spi-slave-mt27xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SPI Slave controller for MediaTek ARM SoCs + +maintainers: + - Leilk Liu + +allOf: + - $ref: "/schemas/spi/spi-controller.yaml#" + +properties: + compatible: + enum: + - mediatek,mt2712-spi-slave + - mediatek,mt8195-spi-slave + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: spi + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + #include + + spi@10013000 { + compatible = "mediatek,mt2712-spi-slave"; + reg = <0x10013000 0x100>; + interrupts = ; + clocks = <&infracfg CLK_INFRA_AO_SPI1>; + clock-names = "spi"; + assigned-clocks = <&topckgen CLK_TOP_SPISLV_SEL>; + assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>; + }; diff --git a/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt b/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt deleted file mode 100644 index 9192724540fd9..0000000000000 --- a/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt +++ /dev/null @@ -1,33 +0,0 @@ -Binding for MTK SPI Slave controller - -Required properties: -- compatible: should be one of the following. - - mediatek,mt2712-spi-slave: for mt2712 platforms - - mediatek,mt8195-spi-slave: for mt8195 platforms -- reg: Address and length of the register set for the device. -- interrupts: Should contain spi interrupt. -- clocks: phandles to input clocks. - It's clock gate, and should be <&infracfg CLK_INFRA_AO_SPI1>. -- clock-names: should be "spi" for the clock gate. - -Optional properties: -- assigned-clocks: it's mux clock, should be <&topckgen CLK_TOP_SPISLV_SEL>. -- assigned-clock-parents: parent of mux clock. - It's PLL, and should be one of the following. - - <&topckgen CLK_TOP_UNIVPLL1_D2>: specify parent clock 312MHZ. - It's the default one. - - <&topckgen CLK_TOP_UNIVPLL1_D4>: specify parent clock 156MHZ. - - <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ. - - <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ. - -Example: -- SoC Specific Portion: -spis1: spi@10013000 { - compatible = "mediatek,mt2712-spi-slave"; - reg = <0 0x10013000 0 0x100>; - interrupts = ; - clocks = <&infracfg CLK_INFRA_AO_SPI1>; - clock-names = "spi"; - assigned-clocks = <&topckgen CLK_TOP_SPISLV_SEL>; - assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>; -}; -- GitLab From 08727dd751234bc878be64e60ea6f0d8ea0d54c0 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Tue, 25 Jan 2022 09:23:29 +0800 Subject: [PATCH 0287/1586] spi: Convert spi-mt65xx to json-schema Convert Mediatek ARM SOC's SPI Master controller binding to json-schema format. Signed-off-by: Leilk Liu Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220125012330.13449-3-leilk.liu@mediatek.com Signed-off-by: Mark Brown --- .../bindings/spi/mediatek,spi-mt65xx.yaml | 101 ++++++++++++++++++ .../devicetree/bindings/spi/spi-mt65xx.txt | 68 ------------ 2 files changed, 101 insertions(+), 68 deletions(-) create mode 100644 Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml delete mode 100644 Documentation/devicetree/bindings/spi/spi-mt65xx.txt diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml new file mode 100644 index 0000000000000..ea977fba49a77 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/mediatek,spi-mt65xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SPI Bus controller for MediaTek ARM SoCs + +maintainers: + - Leilk Liu + +allOf: + - $ref: "/schemas/spi/spi-controller.yaml#" + +properties: + compatible: + oneOf: + - items: + - enum: + - mediatek,mt7629-spi + - const: mediatek,mt7622-spi + - items: + - enum: + - mediatek,mt8516-spi + - const: mediatek,mt2712-spi + - items: + - enum: + - mediatek,mt6779-spi + - mediatek,mt8192-spi + - mediatek,mt8195-spi + - const: mediatek,mt6765-spi + - items: + - enum: + - mediatek,mt2701-spi + - mediatek,mt2712-spi + - mediatek,mt6589-spi + - mediatek,mt6765-spi + - mediatek,mt6893-spi + - mediatek,mt7622-spi + - mediatek,mt8135-spi + - mediatek,mt8173-spi + - mediatek,mt8183-spi + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: clock used for the parent clock + - description: clock used for the muxes clock + - description: clock used for the clock gate + + clock-names: + items: + - const: parent-clk + - const: sel-clk + - const: spi-clk + + mediatek,pad-select: + $ref: /schemas/types.yaml#/definitions/uint32-array + maxItems: 4 + items: + enum: [0, 1, 2, 3] + description: + specify which pins group(ck/mi/mo/cs) spi controller used. + This is an array. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - '#address-cells' + - '#size-cells' + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + #include + + spi@1100a000 { + compatible = "mediatek,mt8173-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x1100a000 0x1000>; + interrupts = ; + clocks = <&topckgen CLK_TOP_SYSPLL3_D2>, + <&topckgen CLK_TOP_SPI_SEL>, + <&pericfg CLK_PERI_SPI0>; + clock-names = "parent-clk", "sel-clk", "spi-clk"; + cs-gpios = <&pio 105 GPIO_ACTIVE_LOW>, <&pio 72 GPIO_ACTIVE_LOW>; + mediatek,pad-select = <1>, <0>; + }; diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt deleted file mode 100644 index 2a24969159cc1..0000000000000 --- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt +++ /dev/null @@ -1,68 +0,0 @@ -Binding for MTK SPI controller - -Required properties: -- compatible: should be one of the following. - - mediatek,mt2701-spi: for mt2701 platforms - - mediatek,mt2712-spi: for mt2712 platforms - - mediatek,mt6589-spi: for mt6589 platforms - - mediatek,mt6765-spi: for mt6765 platforms - - mediatek,mt7622-spi: for mt7622 platforms - - "mediatek,mt7629-spi", "mediatek,mt7622-spi": for mt7629 platforms - - mediatek,mt8135-spi: for mt8135 platforms - - mediatek,mt8173-spi: for mt8173 platforms - - mediatek,mt8183-spi: for mt8183 platforms - - mediatek,mt6893-spi: for mt6893 platforms - - "mediatek,mt8192-spi", "mediatek,mt6765-spi": for mt8192 platforms - - "mediatek,mt8195-spi", "mediatek,mt6765-spi": for mt8195 platforms - - "mediatek,mt8516-spi", "mediatek,mt2712-spi": for mt8516 platforms - - "mediatek,mt6779-spi", "mediatek,mt6765-spi": for mt6779 platforms - -- #address-cells: should be 1. - -- #size-cells: should be 0. - -- reg: Address and length of the register set for the device - -- interrupts: Should contain spi interrupt - -- clocks: phandles to input clocks. - The first should be one of the following. It's PLL. - - <&clk26m>: specify parent clock 26MHZ. - - <&topckgen CLK_TOP_SYSPLL3_D2>: specify parent clock 109MHZ. - It's the default one. - - <&topckgen CLK_TOP_SYSPLL4_D2>: specify parent clock 78MHZ. - - <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ. - - <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ. - The second should be <&topckgen CLK_TOP_SPI_SEL>. It's clock mux. - The third is <&pericfg CLK_PERI_SPI0>. It's clock gate. - -- clock-names: shall be "parent-clk" for the parent clock, "sel-clk" for the - muxes clock, and "spi-clk" for the clock gate. - -Optional properties: --cs-gpios: see spi-bus.txt. - -- mediatek,pad-select: specify which pins group(ck/mi/mo/cs) spi - controller used. This is an array, the element value should be 0~3, - only required for MT8173. - 0: specify GPIO69,70,71,72 for spi pins. - 1: specify GPIO102,103,104,105 for spi pins. - 2: specify GPIO128,129,130,131 for spi pins. - 3: specify GPIO5,6,7,8 for spi pins. - -Example: - -- SoC Specific Portion: -spi: spi@1100a000 { - compatible = "mediatek,mt8173-spi"; - #address-cells = <1>; - #size-cells = <0>; - reg = <0 0x1100a000 0 0x1000>; - interrupts = ; - clocks = <&topckgen CLK_TOP_SYSPLL3_D2>, - <&topckgen CLK_TOP_SPI_SEL>, - <&pericfg CLK_PERI_SPI0>; - clock-names = "parent-clk", "sel-clk", "spi-clk"; - cs-gpios = <&pio 105 GPIO_ACTIVE_LOW>, <&pio 72 GPIO_ACTIVE_LOW>; - mediatek,pad-select = <1>, <0>; -}; -- GitLab From ccbc5d0a92c5f1d42d1a1635b53f1987821e9cdd Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Tue, 25 Jan 2022 09:23:30 +0800 Subject: [PATCH 0288/1586] spi: Add compatible for Mediatek MT8186 This commit adds dt-binding documentation of spi bus for Mediatek MT8186 SoC Platform. Signed-off-by: Leilk Liu Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220125012330.13449-4-leilk.liu@mediatek.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml index ea977fba49a77..bfa44acb1bdd0 100644 --- a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml +++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml @@ -26,6 +26,7 @@ properties: - items: - enum: - mediatek,mt6779-spi + - mediatek,mt8186-spi - mediatek,mt8192-spi - mediatek,mt8195-spi - const: mediatek,mt6765-spi -- GitLab From 833026ad56f76d1a1035d6511fd5aeed308465fd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 25 Jan 2022 09:52:02 +0300 Subject: [PATCH 0289/1586] spi: spidev: prevent spidev->speed_hz from being zero A zero value for spi->max_speed_hz or spidev->speed_hz does not make sense and trying to set that can lead to divide by zero crashes in a some of the drivers. drivers/spi/spi-s3c64xx.c:874 s3c64xx_spi_setup() error: potential divide by zero bug '/ spi->max_speed_hz'. drivers/spi/spi-fsl-dspi.c:613 hz_to_spi_baud() error: potential divide by zero bug '/ speed_hz'. drivers/spi/spi-xlp.c:146 xlp_spi_setup() error: potential divide by zero bug '/ (spi->max_speed_hz)'. drivers/spi/spi-orion.c:162 orion_spi_baudrate_set() error: potential divide by zero bug '/ speed'. Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20220125065202.GA8807@kili Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index a5cceca8b82b6..dd824db63fbe6 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -453,22 +453,29 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) dev_dbg(&spi->dev, "%d bits per word\n", tmp); } break; - case SPI_IOC_WR_MAX_SPEED_HZ: + case SPI_IOC_WR_MAX_SPEED_HZ: { + u32 save; + retval = get_user(tmp, (__u32 __user *)arg); - if (retval == 0) { - u32 save = spi->max_speed_hz; + if (retval) + break; + if (tmp == 0) { + retval = -EINVAL; + break; + } - spi->max_speed_hz = tmp; - retval = spi_setup(spi); - if (retval == 0) { - spidev->speed_hz = tmp; - dev_dbg(&spi->dev, "%d Hz (max)\n", - spidev->speed_hz); - } - spi->max_speed_hz = save; + save = spi->max_speed_hz; + + spi->max_speed_hz = tmp; + retval = spi_setup(spi); + if (retval == 0) { + spidev->speed_hz = tmp; + dev_dbg(&spi->dev, "%d Hz (max)\n", spidev->speed_hz); } - break; + spi->max_speed_hz = save; + break; + } default: /* segmented and/or full-duplex I/O request */ /* Check message and copy into scratch area */ -- GitLab From 47e8fe57a66f72c5734b981b21557c732b9a5eb6 Mon Sep 17 00:00:00 2001 From: Li-hao Kuo Date: Mon, 7 Feb 2022 09:57:22 +0800 Subject: [PATCH 0290/1586] spi: Modify irq request position and modify parameters - Change irq request position to the back. - Add temporary varilable and setting (as suggested by Mr. Andy Shevchenko) Fixes: f62ca4e2a863 ("spi: Add spi driver for Sunplus SP7021") Signed-off-by: Li-hao Kuo Link: https://lore.kernel.org/r/a94e3b123773fe303221d2bd2e4ce36ffa905a1c.1644198957.git.lhjeff911@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sunplus-sp7021.c | 63 ++++++++++++++++---------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c index e5bdeb3eba458..ba5ed9f7277a3 100644 --- a/drivers/spi/spi-sunplus-sp7021.c +++ b/drivers/spi/spi-sunplus-sp7021.c @@ -110,7 +110,8 @@ static irqreturn_t sp7021_spi_slave_irq(int irq, void *dev) unsigned int data_status; data_status = readl(pspim->s_base + SP7021_DATA_RDY_REG); - writel(data_status | SP7021_SLAVE_CLR_INT, pspim->s_base + SP7021_DATA_RDY_REG); + data_status |= SP7021_SLAVE_CLR_INT; + writel(data_status , pspim->s_base + SP7021_DATA_RDY_REG); complete(&pspim->slave_isr); return IRQ_HANDLED; } @@ -127,14 +128,16 @@ static int sp7021_spi_slave_abort(struct spi_controller *ctlr) static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer) { struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller); + u32 value; reinit_completion(&pspim->slave_isr); - writel(SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3), - pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); + value = SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3); + writel(value, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG); writel(xfer->tx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG); - writel(readl(pspim->s_base + SP7021_DATA_RDY_REG) | SP7021_SLAVE_DATA_RDY, - pspim->s_base + SP7021_DATA_RDY_REG); + value = readl(pspim->s_base + SP7021_DATA_RDY_REG); + value |= SP7021_SLAVE_DATA_RDY; + writel(value, pspim->s_base + SP7021_DATA_RDY_REG); if (wait_for_completion_interruptible(&pspim->isr_done)) { dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__); return -EINTR; @@ -145,11 +148,11 @@ static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer) { struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller); - int ret = 0; + u32 value; reinit_completion(&pspim->isr_done); - writel(SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3), - pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); + value = SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3); + writel(value, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG); writel(xfer->rx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG); if (wait_for_completion_interruptible(&pspim->isr_done)) { @@ -157,7 +160,7 @@ static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer return -EINTR; } writel(SP7021_SLAVE_SW_RST, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG); - return ret; + return 0; } static void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len) @@ -188,7 +191,6 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev) unsigned int tx_cnt, total_len; unsigned int tx_len, rx_cnt; unsigned int fd_status; - unsigned long flags; bool isrdone = false; u32 value; @@ -203,7 +205,7 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev) if (tx_len == 0 && total_len == 0) return IRQ_NONE; - spin_lock_irqsave(&pspim->lock, flags); + spin_lock_irq(&pspim->lock); rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status); if (fd_status & SP7021_RX_FULL_FLAG) @@ -243,7 +245,7 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev) if (isrdone) complete(&pspim->isr_done); - spin_unlock_irqrestore(&pspim->lock, flags); + spin_unlock_irq(&pspim->lock); return IRQ_HANDLED; } @@ -296,11 +298,10 @@ static void sp7021_spi_setup_clk(struct spi_controller *ctlr, struct spi_transfe u32 clk_rate, clk_sel, div; clk_rate = clk_get_rate(pspim->spi_clk); - div = clk_rate / xfer->speed_hz; - if (div < 2) - div = 2; + div = max(2U, clk_rate / xfer->speed_hz); + clk_sel = (div / 2) - 1; - pspim->xfer_conf &= SP7021_CLK_MASK; + pspim->xfer_conf &= ~SP7021_CLK_MASK; pspim->xfer_conf |= FIELD_PREP(SP7021_CLK_MASK, clk_sel); writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG); } @@ -313,7 +314,6 @@ static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct sp unsigned int xfer_cnt, xfer_len, last_len; unsigned int i, len_temp; u32 reg_temp; - int ret; xfer_cnt = xfer->len / SP7021_SPI_DATA_SIZE; last_len = xfer->len % SP7021_SPI_DATA_SIZE; @@ -366,9 +366,8 @@ static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct sp writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG); mutex_unlock(&pspim->buf_lock); - ret = 0; } - return ret; + return 0; } static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi_device *spi, @@ -376,12 +375,12 @@ static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi { struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); struct device *dev = pspim->dev; - int mode, ret = 0; + int mode, ret; mode = SP7021_SPI_IDLE; if (xfer->tx_buf && xfer->rx_buf) { dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__); - ret = -EINVAL; + return -EINVAL; } else if (xfer->tx_buf) { xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf, xfer->len, DMA_TO_DEVICE); @@ -445,7 +444,7 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev) ctlr = devm_spi_alloc_master(dev, sizeof(*pspim)); if (!ctlr) return -ENOMEM; - device_set_node(&ctlr->dev, pdev->dev.fwnode); + device_set_node(&ctlr->dev, dev_fwnode(dev)); ctlr->bus_num = pdev->id; ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST; ctlr->auto_runtime_pm = true; @@ -488,16 +487,6 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev) if (pspim->s_irq < 0) return pspim->s_irq; - ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq, - IRQF_TRIGGER_RISING, pdev->name, pspim); - if (ret) - return ret; - - ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq, - IRQF_TRIGGER_RISING, pdev->name, pspim); - if (ret) - return ret; - pspim->spi_clk = devm_clk_get(dev, NULL); if (IS_ERR(pspim->spi_clk)) return dev_err_probe(dev, PTR_ERR(pspim->spi_clk), "clk get fail\n"); @@ -522,6 +511,16 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev) if (ret) return ret; + ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq, + IRQF_TRIGGER_RISING, pdev->name, pspim); + if (ret) + return ret; + + ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq, + IRQF_TRIGGER_RISING, pdev->name, pspim); + if (ret) + return ret; + pm_runtime_enable(dev); ret = spi_register_controller(ctlr); if (ret) { -- GitLab From d08de0259dfe172caf073b921c6b27ff089605a9 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Sat, 29 Jan 2022 01:04:53 -0300 Subject: [PATCH 0291/1586] spi: ath79: add mem_ops for fast-read Reading from memory is 3x faster than bit-bang read operation. Also, for tl-wr2543nd, the bit-bang read was sporadically returning random data, possibly a HW defect, while fast-read works as expected. Signed-off-by: Luiz Angelo Daros de Luca Link: https://lore.kernel.org/r/20220129040453.8476-1-luizluca@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-ath79.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c index d1e287d2d9cdc..607e7a49fb89f 100644 --- a/drivers/spi/spi-ath79.c +++ b/drivers/spi/spi-ath79.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,38 @@ static u32 ath79_spi_txrx_mode0(struct spi_device *spi, unsigned int nsecs, return ath79_spi_rr(sp, AR71XX_SPI_REG_RDS); } +static int ath79_exec_mem_op(struct spi_mem *mem, + const struct spi_mem_op *op) +{ + struct ath79_spi *sp = ath79_spidev_to_sp(mem->spi); + + /* Ensures that reading is performed on device connected to hardware cs0 */ + if (mem->spi->chip_select || mem->spi->cs_gpiod) + return -ENOTSUPP; + + /* Only use for fast-read op. */ + if (op->cmd.opcode != 0x0b || op->data.dir != SPI_MEM_DATA_IN || + op->addr.nbytes != 3 || op->dummy.nbytes != 1) + return -ENOTSUPP; + + /* disable GPIO mode */ + ath79_spi_wr(sp, AR71XX_SPI_REG_FS, 0); + + memcpy_fromio(op->data.buf.in, sp->base + op->addr.val, op->data.nbytes); + + /* enable GPIO mode */ + ath79_spi_wr(sp, AR71XX_SPI_REG_FS, AR71XX_SPI_FS_GPIO); + + /* restore IOC register */ + ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base); + + return 0; +} + +static const struct spi_controller_mem_ops ath79_mem_ops = { + .exec_op = ath79_exec_mem_op, +}; + static int ath79_spi_probe(struct platform_device *pdev) { struct spi_master *master; @@ -154,6 +187,7 @@ static int ath79_spi_probe(struct platform_device *pdev) master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); master->flags = SPI_MASTER_GPIO_SS; master->num_chipselect = 3; + master->mem_ops = &ath79_mem_ops; sp->bitbang.master = master; sp->bitbang.chipselect = ath79_spi_chipselect; -- GitLab From 4f92724d4b92c024e721063f520d66e11ca4b54b Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 28 Jan 2022 16:52:38 +0000 Subject: [PATCH 0292/1586] spi: tegra114: Add missing IRQ check in tegra_spi_probe This func misses checking for platform_get_irq()'s call and may passes the negative error codes to request_threaded_irq(), which takes unsigned IRQ #, causing it to fail with -EINVAL, overriding an original error code. Stop calling request_threaded_irq() with invalid IRQ #s. Fixes: f333a331adfa ("spi/tegra114: add spi driver") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220128165238.25615-1-linmq006@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra114.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index e9de1d958bbd2..8f345247a8c32 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev) tspi->phys = r->start; spi_irq = platform_get_irq(pdev, 0); + if (spi_irq < 0) { + ret = spi_irq; + goto exit_free_master; + } tspi->irq = spi_irq; tspi->clk = devm_clk_get(&pdev->dev, "spi"); -- GitLab From 3b8ab4da3405d07ddfe434f17f9014740b30a19c Mon Sep 17 00:00:00 2001 From: Li-hao Kuo Date: Mon, 7 Feb 2022 09:46:34 +0800 Subject: [PATCH 0293/1586] spi: Fix test error for sp7021. Remove the include path and modify parameters for fix error for bt binding test Fixes: a708078eeb99 ("spi: Add Sunplus SP7021 schema") Signed-off-by: Li-hao Kuo Link: https://lore.kernel.org/r/b8a94fbfcab68b1279b09b6297099310c209927b.1644198244.git.lhjeff911@gmail.com Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-sunplus-sp7021.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml index 38589fdbc80db..298eac28c40ff 100644 --- a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml +++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml @@ -59,8 +59,6 @@ unevaluatedProperties: false examples: - | - #include - #include #include spi@9C002D80 { compatible = "sunplus,sp7021-spi"; @@ -73,8 +71,8 @@ examples: interrupts = <144 IRQ_TYPE_LEVEL_HIGH>, <146 IRQ_TYPE_LEVEL_HIGH>, <145 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clkc SPI_COMBO_0>; - resets = <&rstc RST_SPI_COMBO_0>; + clocks = <&clkc 0x32>; + resets = <&rstc 0x22>; pinctrl-names = "default"; pinctrl-0 = <&pins_spi0>; }; -- GitLab From 47c3e06ed95aa9b74932dbc6b23b544f644faf84 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 28 Jan 2022 16:59:56 +0000 Subject: [PATCH 0294/1586] spi: tegra210-quad: Fix missin IRQ check in tegra_qspi_probe This func misses checking for platform_get_irq()'s call and may passes the negative error codes to request_threaded_irq(), which takes unsigned IRQ #, causing it to fail with -EINVAL, overriding an original error code. Stop calling request_threaded_irq() with invalid IRQ #s. Fixes: 921fc1838fb0 ("spi: tegra210-quad: Add support for Tegra210 QSPI controller") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220128165956.27821-1-linmq006@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index ce1bdb4767ea3..cb00ac2fc7d8e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1240,6 +1240,8 @@ static int tegra_qspi_probe(struct platform_device *pdev) tqspi->phys = r->start; qspi_irq = platform_get_irq(pdev, 0); + if (qspi_irq < 0) + return qspi_irq; tqspi->irq = qspi_irq; tqspi->clk = devm_clk_get(&pdev->dev, "qspi"); -- GitLab From 3301bc53358a0eb0a0db65fd7a513cd4cb50c83a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 10 Jan 2022 15:29:45 +0800 Subject: [PATCH 0295/1586] lib/sbitmap: kill 'depth' from sbitmap_word Only the last sbitmap_word can have different depth, and all the others must have same depth of 1U << sb->shift, so not necessary to store it in sbitmap_word, and it can be retrieved easily and efficiently by adding one internal helper of __map_depth(sb, index). Remove 'depth' field from sbitmap_word, then the annotation of ____cacheline_aligned_in_smp for 'word' isn't needed any more. Not see performance effect when running high parallel IOPS test on null_blk. This way saves us one cacheline(usually 64 words) per each sbitmap_word. Cc: Martin Wilck Signed-off-by: Ming Lei Reviewed-by: Martin Wilck Reviewed-by: John Garry Link: https://lore.kernel.org/r/20220110072945.347535-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 17 ++++++++++------- lib/sbitmap.c | 34 ++++++++++++++-------------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 95df357ec0093..df3b584b0f0cb 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -27,15 +27,10 @@ struct seq_file; * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { - /** - * @depth: Number of bits being used in @word/@cleared - */ - unsigned long depth; - /** * @word: word holding free bits */ - unsigned long word ____cacheline_aligned_in_smp; + unsigned long word; /** * @cleared: word holding cleared bits @@ -164,6 +159,14 @@ struct sbitmap_queue { int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint); +/* sbitmap internal helper */ +static inline unsigned int __map_depth(const struct sbitmap *sb, int index) +{ + if (index == sb->map_nr - 1) + return sb->depth - (index << sb->shift); + return 1U << sb->shift; +} + /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. @@ -251,7 +254,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, - sb->map[index].depth - nr, + __map_depth(sb, index) - nr, sb->depth - scanned); scanned += depth; diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 09d293c30fd2d..b7cb96ae4701b 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -85,7 +85,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, bool alloc_hint) { unsigned int bits_per_word; - unsigned int i; if (shift < 0) shift = sbitmap_calculate_shift(depth); @@ -117,10 +116,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, return -ENOMEM; } - for (i = 0; i < sb->map_nr; i++) { - sb->map[i].depth = min(depth, bits_per_word); - depth -= sb->map[i].depth; - } return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); @@ -135,11 +130,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); - - for (i = 0; i < sb->map_nr; i++) { - sb->map[i].depth = min(depth, bits_per_word); - depth -= sb->map[i].depth; - } } EXPORT_SYMBOL_GPL(sbitmap_resize); @@ -184,8 +174,8 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, int nr; do { - nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, - !sb->round_robin); + nr = __sbitmap_get_word(&map->word, __map_depth(sb, index), + alloc_hint, !sb->round_robin); if (nr != -1) break; if (!sbitmap_deferred_clear(map)) @@ -257,7 +247,9 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, for (i = 0; i < sb->map_nr; i++) { again: nr = __sbitmap_get_word(&sb->map[index].word, - min(sb->map[index].depth, shallow_depth), + min_t(unsigned int, + __map_depth(sb, index), + shallow_depth), SB_NR_TO_BIT(sb, alloc_hint), true); if (nr != -1) { nr += index << sb->shift; @@ -315,11 +307,12 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; + unsigned int word_depth = __map_depth(sb, i); if (set) - weight += bitmap_weight(&word->word, word->depth); + weight += bitmap_weight(&word->word, word_depth); else - weight += bitmap_weight(&word->cleared, word->depth); + weight += bitmap_weight(&word->cleared, word_depth); } return weight; } @@ -367,7 +360,7 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) for (i = 0; i < sb->map_nr; i++) { unsigned long word = READ_ONCE(sb->map[i].word); unsigned long cleared = READ_ONCE(sb->map[i].cleared); - unsigned int word_bits = READ_ONCE(sb->map[i].depth); + unsigned int word_bits = __map_depth(sb, i); word &= ~cleared; @@ -531,15 +524,16 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, for (i = 0; i < sb->map_nr; i++) { struct sbitmap_word *map = &sb->map[index]; unsigned long get_mask; + unsigned int map_depth = __map_depth(sb, index); sbitmap_deferred_clear(map); - if (map->word == (1UL << (map->depth - 1)) - 1) + if (map->word == (1UL << (map_depth - 1)) - 1) continue; - nr = find_first_zero_bit(&map->word, map->depth); - if (nr + nr_tags <= map->depth) { + nr = find_first_zero_bit(&map->word, map_depth); + if (nr + nr_tags <= map_depth) { atomic_long_t *ptr = (atomic_long_t *) &map->word; - int map_tags = min_t(int, nr_tags, map->depth); + int map_tags = min_t(int, nr_tags, map_depth); unsigned long val, ret; get_mask = ((1UL << map_tags) - 1) << nr; -- GitLab From 3f607293b74d6acb06571a774a500143c1f0ed2c Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 8 Feb 2022 20:07:04 +0800 Subject: [PATCH 0296/1586] sbitmap: Delete old sbitmap_queue_get_shallow() Since __sbitmap_queue_get_shallow() was introduced in commit c05e66733788 ("sbitmap: add sbitmap_get_shallow() operation"), it has not been used. Delete __sbitmap_queue_get_shallow() and rename public __sbitmap_queue_get_shallow() -> sbitmap_queue_get_shallow() as it is odd to have public __foo but no foo at all. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1644322024-105340-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- include/linux/sbitmap.h | 34 ++++------------------------------ lib/sbitmap.c | 6 +++--- 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 845f74e8dd7bb..0fd409b8e86ef 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -107,7 +107,7 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, return BLK_MQ_NO_TAG; if (data->shallow_depth) - return __sbitmap_queue_get_shallow(bt, data->shallow_depth); + return sbitmap_queue_get_shallow(bt, data->shallow_depth); else return __sbitmap_queue_get(bt); } diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index df3b584b0f0cb..dffeb8281c2d9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -135,7 +135,7 @@ struct sbitmap_queue { /** * @min_shallow_depth: The minimum shallow depth which may be passed to - * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). + * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; }; @@ -463,7 +463,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset); /** - * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. @@ -475,8 +475,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth); +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct @@ -498,32 +498,6 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, return nr; } -/** - * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct - * sbitmap_queue, limiting the depth used from each word. - * @sbq: Bitmap queue to allocate from. - * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to - * sbitmap_queue_clear()). - * @shallow_depth: The maximum number of bits to allocate from a single word. - * See sbitmap_get_shallow(). - * - * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after - * initializing @sbq. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int *cpu, - unsigned int shallow_depth) -{ - int nr; - - *cpu = get_cpu(); - nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); - put_cpu(); - return nr; -} - /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index b7cb96ae4701b..2eb3de18ded3e 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -557,14 +557,14 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, return 0; } -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth) +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth) { WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); return sbitmap_get_shallow(&sbq->sb, shallow_depth); } -EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); +EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow); void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth) -- GitLab From adbb8a1edecda677c5f031b44da25680a08a163e Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 24 Dec 2021 16:13:31 +0000 Subject: [PATCH 0297/1586] perf/arm-ccn: Use platform_get_irq() to get the interrupt platform_get_resource(pdev, IORESOURCE_IRQ, ..) relies on static allocation of IRQ resources in DT core code, this causes an issue when using hierarchical interrupt domains using "interrupts" property in the node as this bypasses the hierarchical setup and messes up the irq chaining. In preparation for removal of static setup of IRQ resource from DT core code use platform_get_irq(). Link: https://lore.kernel.org/r/20211224161334.31123-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Lad Prabhakar Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index a96c316045459..40b352e8aa7f7 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1460,8 +1460,7 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id) static int arm_ccn_probe(struct platform_device *pdev) { struct arm_ccn *ccn; - struct resource *res; - unsigned int irq; + int irq; int err; ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL); @@ -1474,10 +1473,9 @@ static int arm_ccn_probe(struct platform_device *pdev) if (IS_ERR(ccn->base)) return PTR_ERR(ccn->base); - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res) - return -EINVAL; - irq = res->start; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; /* Check if we can use the interrupt */ writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE, -- GitLab From e564518b0727c8960942d8b1452703bbabf1a5ec Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Jan 2022 15:00:47 +0100 Subject: [PATCH 0298/1586] perf: MARVELL_CN10K_TAD_PMU should depend on ARCH_THUNDER The Marvell CN10K Last-Level cache Tag-and-data Units (LLC-TAD) performance monitor is only present on Marvell CN10K SoCs. Hence add a dependency on ARCH_THUNDER, to prevent asking the user about this driver when configuring a kernel without Cavium Thunder (incl. Marvell CN10K) SoC support. Fixes: 036a7584bede ("drivers: perf: Add LLC-TAD perf counter support") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/b4662a2c767d04cca19417e0c845edea2da262ad.1641995941.git.geert+renesas@glider.be Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc6864..7d6ffdf44a415 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -141,7 +141,7 @@ config ARM_DMC620_PMU config MARVELL_CN10K_TAD_PMU tristate "Marvell CN10K LLC-TAD PMU" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) performance monitors on CN10K family silicons. -- GitLab From 6f75217b20a768c72fb8bb999e25a95673fe0174 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 3 Feb 2022 18:01:18 +0000 Subject: [PATCH 0299/1586] perf/arm-cmn: Make arm_cmn_debugfs static Indeed our debugfs directory is driver-internal so should be static. Link: https://lore.kernel.org/r/202202030812.II1K2ZXf-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Robin Murphy Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/ca9248caaae69b5134f69e085fe78905dfe74378.1643911278.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 0e48adce57ef3..d0e1ce2c83bab 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -353,7 +353,7 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } -struct dentry *arm_cmn_debugfs; +static struct dentry *arm_cmn_debugfs; #ifdef CONFIG_DEBUG_FS static const char *arm_cmn_device_type(u8 type) -- GitLab From 8c0c56879d067ad1e68c0aeeecc7d5f57bac3ffd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Feb 2022 15:12:28 +0000 Subject: [PATCH 0300/1586] perf/marvell_cn10k: Fix unused variable warning when W=1 and CONFIG_OF=n The kbuild helpfully reports that the Marvell CN10K TAD PMU driver emits a warning when building with W=1 and CONFIG_OF=n: | >> drivers/perf/marvell_cn10k_tad_pmu.c:371:34: warning: unused variable 'tad_pmu_of_match' [-Wunused-const-variable] static const struct of_device_id tad_pmu_of_match[] = { Guard the match table with CONFIG_OF to squash the warning. Link: https://lore.kernel.org/r/202201292349.zRQLcDDD-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Will Deacon --- drivers/perf/marvell_cn10k_tad_pmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 7f4d292658e32..ee67305f822d0 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -368,10 +368,12 @@ static int tad_pmu_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF static const struct of_device_id tad_pmu_of_match[] = { { .compatible = "marvell,cn10k-tad-pmu", }, {}, }; +#endif static struct platform_driver tad_pmu_driver = { .driver = { -- GitLab From 602c873eb52e5717057eb0971258c8eb7440c4a5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 Feb 2022 20:10:01 +0100 Subject: [PATCH 0301/1586] perf: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Will Deacon Link: https://lore.kernel.org/r/10025610.nUPlyArG6x@kreacher Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 6 ++---- drivers/perf/xgene_pmu.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 05378c0fd8f32..1edb9c03704fd 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -887,13 +887,11 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level, void *data, void **return_value) { + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); struct tx2_uncore_pmu *tx2_pmu; - struct acpi_device *adev; enum tx2_uncore_type type; - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - if (acpi_bus_get_status(adev) || !adev->status.present) + if (!adev || acpi_bus_get_status(adev) || !adev->status.present) return AE_OK; type = get_tx2_pmu_type(adev); diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 2b6d476bd2137..5283608dc055b 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1549,14 +1549,12 @@ static const struct acpi_device_id *xgene_pmu_acpi_match_type( static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, void *data, void **return_value) { + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); const struct acpi_device_id *acpi_id; struct xgene_pmu *xgene_pmu = data; struct xgene_pmu_dev_ctx *ctx; - struct acpi_device *adev; - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - if (acpi_bus_get_status(adev) || !adev->status.present) + if (!adev || acpi_bus_get_status(adev) || !adev->status.present) return AE_OK; acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev); -- GitLab From 82980b1622d97017053c6792382469d7dc26a486 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 16 Feb 2021 15:04:34 +0000 Subject: [PATCH 0302/1586] rcu: Kill rnp->ofl_seq and use only rcu_state.ofl_lock for exclusion If we allow architectures to bring APs online in parallel, then we end up requiring rcu_cpu_starting() to be reentrant. But currently, the manipulation of rnp->ofl_seq is not thread-safe. However, rnp->ofl_seq is also fairly much pointless anyway since both rcu_cpu_starting() and rcu_report_dead() hold rcu_state.ofl_lock for fairly much the whole time that rnp->ofl_seq is set to an odd number to indicate that an operation is in progress. So drop rnp->ofl_seq completely, and use only rcu_state.ofl_lock. This has a couple of minor complexities: lockdep will complain when we take rcu_state.ofl_lock, and currently accepts the 'excuse' of having an odd value in rnp->ofl_seq. So switch it to an arch_spinlock_t to avoid that false positive complaint. Since we're killing rnp->ofl_seq of course that 'excuse' has to be changed too, so make it check for arch_spin_is_locked(rcu_state.ofl_lock). There's no arch_spin_lock_irqsave() so we have to manually save and restore local interrupts around the locking. At Paul's request based on Neeraj's analysis, make rcu_gp_init not just wait but *exclude* any CPU online/offline activity, which was fairly much true already by virtue of it holding rcu_state.ofl_lock. Signed-off-by: David Woodhouse Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 71 ++++++++++++++++++++++++----------------------- kernel/rcu/tree.h | 4 +-- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0b..73a4c9d07b865 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,7 +91,7 @@ static struct rcu_state rcu_state = { .abbr = RCU_ABBR, .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), - .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), + .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED, }; /* Dump rcu_node combining tree at boot to verify correct setup. */ @@ -1175,7 +1175,15 @@ bool rcu_lockdep_current_cpu_online(void) preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); rnp = rdp->mynode; - if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1) + /* + * Strictly, we care here about the case where the current CPU is + * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask + * not being up to date. So arch_spin_is_locked() might have a + * false positive if it's held by some *other* CPU, but that's + * OK because that just means a false *negative* on the warning. + */ + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || + arch_spin_is_locked(&rcu_state.ofl_lock)) ret = true; preempt_enable_notrace(); return ret; @@ -1739,7 +1747,6 @@ static void rcu_strict_gp_boundary(void *unused) */ static noinline_for_stack bool rcu_gp_init(void) { - unsigned long firstseq; unsigned long flags; unsigned long oldmask; unsigned long mask; @@ -1782,22 +1789,17 @@ static noinline_for_stack bool rcu_gp_init(void) * of RCU's Requirements documentation. */ WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); + /* Exclude CPU hotplug operations. */ rcu_for_each_leaf_node(rnp) { - // Wait for CPU-hotplug operations that might have - // started before this grace period did. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. - firstseq = READ_ONCE(rnp->ofl_seq); - if (firstseq & 0x1) - while (firstseq == READ_ONCE(rnp->ofl_seq)) - schedule_timeout_idle(1); // Can't wake unless RCU is watching. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values. - raw_spin_lock(&rcu_state.ofl_lock); - raw_spin_lock_irq_rcu_node(rnp); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); + raw_spin_lock_rcu_node(rnp); if (rnp->qsmaskinit == rnp->qsmaskinitnext && !rnp->wait_blkd_tasks) { /* Nothing to do on this leaf rcu_node structure. */ - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); continue; } @@ -1832,8 +1834,9 @@ static noinline_for_stack bool rcu_gp_init(void) rcu_cleanup_dead_rnp(rnp); } - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); } rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */ @@ -4287,11 +4290,10 @@ void rcu_cpu_starting(unsigned int cpu) rnp = rdp->mynode; mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); rcu_dynticks_eqs_online(); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_spin_lock_rcu_node(rnp); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); newcpu = !(rnp->expmaskinitnext & mask); rnp->expmaskinitnext |= mask; @@ -4304,15 +4306,18 @@ void rcu_cpu_starting(unsigned int cpu) /* An incoming CPU should never be blocking a grace period. */ if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ + /* rcu_report_qs_rnp() *really* wants some flags to restore */ + unsigned long flags2; + + local_irq_save(flags2); rcu_disable_urgency_upon_qs(rdp); /* Report QS -after- changing ->qsmaskinitnext! */ - rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2); } else { - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + raw_spin_unlock_rcu_node(rnp); } - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } @@ -4326,7 +4331,7 @@ void rcu_cpu_starting(unsigned int cpu) */ void rcu_report_dead(unsigned int cpu) { - unsigned long flags; + unsigned long flags, seq_flags; unsigned long mask; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -4340,10 +4345,8 @@ void rcu_report_dead(unsigned int cpu) /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock(&rcu_state.ofl_lock); + local_irq_save(seq_flags); + arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); @@ -4354,10 +4357,8 @@ void rcu_report_dead(unsigned int cpu) } WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - raw_spin_unlock(&rcu_state.ofl_lock); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(seq_flags); rdp->cpu_started = false; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd085..4b4bcef8a9743 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -56,8 +56,6 @@ struct rcu_node { /* Initialized from ->qsmaskinitnext at the */ /* beginning of each grace period. */ unsigned long qsmaskinitnext; - unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */ - /* Online CPUs for next grace period. */ unsigned long expmask; /* CPUs or groups that need to check in */ /* to allow the current expedited GP */ /* to complete. */ @@ -355,7 +353,7 @@ struct rcu_state { const char *name; /* Name of structure. */ char abbr; /* Abbreviated name. */ - raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; + arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; /* Synchronize offline with */ /* GP pre-initialization. */ }; -- GitLab From 0cabb47af3cfaeb6007ba3868379bbd4daee64cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Dec 2021 16:25:20 -0800 Subject: [PATCH 0303/1586] rcu: Refactor rcu_barrier() empty-list handling This commit saves a few lines by checking first for an empty callback list. If the callback list is empty, then that CPU is taken care of, regardless of its online or nocb state. Also simplify tracing accordingly and fold a few lines together. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 9 ++++----- kernel/rcu/tree.c | 25 ++++++++----------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 670e41783edd8..90b2fb0292cb1 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read, * Tracepoint for rcu_barrier() execution. The string "s" describes * the rcu_barrier phase: * "Begin": rcu_barrier() started. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "EarlyExit": rcu_barrier() piggybacked, thus early exit. * "Inc1": rcu_barrier() piggyback check counter incremented. - * "OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks. - * "OnlineQ": rcu_barrier() found online CPU with callbacks. - * "OnlineNQ": rcu_barrier() found online CPU, no callbacks. + * "Inc2": rcu_barrier() piggyback check counter incremented. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. * "IRQNQ": An rcu_barrier_callback() callback found no callbacks. - * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "LastCB": An rcu_barrier_callback() invoked the last callback. - * "Inc2": rcu_barrier() piggyback check counter incremented. + * "NQ": rcu_barrier() found a CPU with no callbacks. + * "OnlineQ": rcu_barrier() found online CPU with callbacks. * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 73a4c9d07b865..57a7a0065750b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4030,8 +4030,7 @@ void rcu_barrier(void) /* Did someone else do our work for us? */ if (rcu_seq_done(&rcu_state.barrier_sequence, s)) { - rcu_barrier_trace(TPS("EarlyExit"), -1, - rcu_state.barrier_sequence); + rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rcu_state.barrier_mutex); return; @@ -4059,26 +4058,18 @@ void rcu_barrier(void) */ for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(&rcu_data, cpu); - if (cpu_is_offline(cpu) && - !rcu_rdp_is_offloaded(rdp)) + if (!rcu_segcblist_n_cbs(&rdp->cblist)) { + rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence); continue; - if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) { - rcu_barrier_trace(TPS("OnlineQ"), cpu, - rcu_state.barrier_sequence); + } + if (cpu_online(cpu)) { + rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence); smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1); - } else if (rcu_segcblist_n_cbs(&rdp->cblist) && - cpu_is_offline(cpu)) { - rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, - rcu_state.barrier_sequence); + } else { + rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence); local_irq_disable(); rcu_barrier_func((void *)cpu); local_irq_enable(); - } else if (cpu_is_offline(cpu)) { - rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu, - rcu_state.barrier_sequence); - } else { - rcu_barrier_trace(TPS("OnlineNQ"), cpu, - rcu_state.barrier_sequence); } } cpus_read_unlock(); -- GitLab From a16578dd5e3a44b53ca0699ac2971679dab97484 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Dec 2021 13:15:18 -0800 Subject: [PATCH 0304/1586] rcu: Rework rcu_barrier() and callback-migration logic This commit reworks rcu_barrier() and callback-migration logic to permit allowing rcu_barrier() to run concurrently with CPU-hotplug operations. The key trick is for callback migration to check to see if an rcu_barrier() is in flight, and, if so, enqueue the ->barrier_head callback on its behalf. This commit adds synchronization with RCU's CPU-hotplug notifiers. Taken together, this will permit a later commit to remove the cpus_read_lock() and cpus_read_unlock() calls from rcu_barrier(). [ paulmck: Updated per kbuild test robot feedback. ] [ paulmck: Updated per reviews session with Neeraj, Frederic, Uladzislau, and Boqun. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 77 +++++++++++++++++++++++++++++++++++++---------- kernel/rcu/tree.h | 2 ++ 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 57a7a0065750b..004ff1c0d192f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3987,13 +3987,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp) } /* - * Called with preemption disabled, and from cross-cpu IRQ context. + * If needed, entrain an rcu_barrier() callback on rdp->cblist. */ -static void rcu_barrier_func(void *cpu_in) +static void rcu_barrier_entrain(struct rcu_data *rdp) { - uintptr_t cpu = (uintptr_t)cpu_in; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence); + unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); + lockdep_assert_held(&rdp->barrier_lock); + if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) + return; rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); rdp->barrier_head.func = rcu_barrier_callback; debug_rcu_head_queue(&rdp->barrier_head); @@ -4003,10 +4006,26 @@ static void rcu_barrier_func(void *cpu_in) atomic_inc(&rcu_state.barrier_cpu_count); } else { debug_rcu_head_unqueue(&rdp->barrier_head); - rcu_barrier_trace(TPS("IRQNQ"), -1, - rcu_state.barrier_sequence); + rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence); } rcu_nocb_unlock(rdp); + smp_store_release(&rdp->barrier_seq_snap, gseq); +} + +/* + * Called with preemption disabled, and from cross-cpu IRQ context. + */ +static void rcu_barrier_handler(void *cpu_in) +{ + uintptr_t cpu = (uintptr_t)cpu_in; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + lockdep_assert_irqs_disabled(); + WARN_ON_ONCE(cpu != rdp->cpu); + WARN_ON_ONCE(cpu != smp_processor_id()); + raw_spin_lock(&rdp->barrier_lock); + rcu_barrier_entrain(rdp); + raw_spin_unlock(&rdp->barrier_lock); } /** @@ -4020,6 +4039,8 @@ static void rcu_barrier_func(void *cpu_in) void rcu_barrier(void) { uintptr_t cpu; + unsigned long flags; + unsigned long gseq; struct rcu_data *rdp; unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence); @@ -4038,6 +4059,7 @@ void rcu_barrier(void) /* Mark the start of the barrier operation. */ rcu_seq_start(&rcu_state.barrier_sequence); + gseq = rcu_state.barrier_sequence; rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence); /* @@ -4058,19 +4080,30 @@ void rcu_barrier(void) */ for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(&rcu_data, cpu); +retry: + if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq) + continue; + raw_spin_lock_irqsave(&rdp->barrier_lock, flags); if (!rcu_segcblist_n_cbs(&rdp->cblist)) { + WRITE_ONCE(rdp->barrier_seq_snap, gseq); + raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence); continue; } - if (cpu_online(cpu)) { - rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence); - smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1); - } else { + if (!rcu_rdp_cpu_online(rdp)) { + rcu_barrier_entrain(rdp); + WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); + raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence); - local_irq_disable(); - rcu_barrier_func((void *)cpu); - local_irq_enable(); + continue; } + raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); + if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) { + schedule_timeout_uninterruptible(1); + goto retry; + } + WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); + rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence); } cpus_read_unlock(); @@ -4087,6 +4120,12 @@ void rcu_barrier(void) /* Mark the end of the barrier operation. */ rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence); rcu_seq_end(&rcu_state.barrier_sequence); + gseq = rcu_state.barrier_sequence; + for_each_possible_cpu(cpu) { + rdp = per_cpu_ptr(&rcu_data, cpu); + + WRITE_ONCE(rdp->barrier_seq_snap, gseq); + } /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_state.barrier_mutex); @@ -4134,6 +4173,8 @@ rcu_boot_init_percpu_data(int cpu) INIT_WORK(&rdp->strict_work, strict_work_handler); WARN_ON_ONCE(rdp->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); + raw_spin_lock_init(&rdp->barrier_lock); + rdp->barrier_seq_snap = rcu_state.barrier_sequence; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; rdp->rcu_onl_gp_seq = rcu_state.gp_seq; @@ -4284,8 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu) local_irq_save(flags); arch_spin_lock(&rcu_state.ofl_lock); rcu_dynticks_eqs_online(); + raw_spin_lock(&rdp->barrier_lock); raw_spin_lock_rcu_node(rnp); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); + raw_spin_unlock(&rdp->barrier_lock); newcpu = !(rnp->expmaskinitnext & mask); rnp->expmaskinitnext |= mask; /* Allow lockless access for expedited grace periods. */ @@ -4372,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu) rcu_segcblist_empty(&rdp->cblist)) return; /* No callbacks to migrate. */ - local_irq_save(flags); + raw_spin_lock_irqsave(&rdp->barrier_lock, flags); + WARN_ON_ONCE(rcu_rdp_cpu_online(rdp)); + rcu_barrier_entrain(rdp); my_rdp = this_cpu_ptr(&rcu_data); my_rnp = my_rdp->mynode; rcu_nocb_lock(my_rdp); /* irqs already disabled. */ @@ -4382,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu) needwake = rcu_advance_cbs(my_rnp, rdp) || rcu_advance_cbs(my_rnp, my_rdp); rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); + raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */ needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp); rcu_segcblist_disable(&rdp->cblist); - WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != - !rcu_segcblist_n_cbs(&my_rdp->cblist)); + WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist)); if (rcu_rdp_is_offloaded(my_rdp)) { raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */ __call_rcu_nocb_wake(my_rdp, true, flags); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 4b4bcef8a9743..84362951ed9e1 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -188,6 +188,8 @@ struct rcu_data { bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ /* 4) rcu_barrier(), OOM callbacks, and expediting. */ + raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */ + unsigned long barrier_seq_snap; /* Snap of rcu_state.barrier_sequence. */ struct rcu_head barrier_head; int exp_dynticks_snap; /* Double-check need for IPI. */ -- GitLab From 80b3fd474c91b3ecfd845b4a0bfb58706b877ba5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Dec 2021 13:35:17 -0800 Subject: [PATCH 0305/1586] rcu: Make rcu_barrier() no longer block CPU-hotplug operations This commit removes the cpus_read_lock() and cpus_read_unlock() calls from rcu_barrier(), thus allowing CPUs to come and go during the course of rcu_barrier() execution. Posting of the ->barrier_head callbacks does synchronize with portions of RCU's CPU-hotplug notifiers, but these locks are held for short time periods on both sides. Thus, full CPU-hotplug operations could both start and finish during the execution of a given rcu_barrier() invocation. Additional synchronization is provided by a global ->barrier_lock. Since the ->barrier_lock is only used during rcu_barrier() execution and during onlining/offlining a CPU, the contention for this lock should be low. It might be tempting to make use of a per-CPU lock just on general principles, but straightforward attempts to do this have the problems shown below. Initial state: 3 CPUs present, CPU 0 and CPU1 do not have any callback and CPU2 has callbacks. 1. CPU0 calls rcu_barrier(). 2. CPU1 starts offlining for CPU2. CPU1 calls rcutree_migrate_callbacks(). rcu_barrier_entrain() is called from rcutree_migrate_callbacks(), with CPU2's rdp->barrier_lock. It does not entrain ->barrier_head for CPU2, as rcu_barrier() on CPU0 hasn't started the barrier sequence (by calling rcu_seq_start(&rcu_state.barrier_sequence)) yet. 3. CPU0 starts new barrier sequence. It iterates over CPU0 and CPU1, after acquiring their per-cpu ->barrier_lock and finds 0 segcblist length. It updates ->barrier_seq_snap for CPU0 and CPU1 and continues loop iteration to CPU2. for_each_possible_cpu(cpu) { raw_spin_lock_irqsave(&rdp->barrier_lock, flags); if (!rcu_segcblist_n_cbs(&rdp->cblist)) { WRITE_ONCE(rdp->barrier_seq_snap, gseq); raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence); continue; } 4. rcutree_migrate_callbacks() completes execution on CPU1. Segcblist len for CPU2 becomes 0. 5. The loop iteration on CPU0, checks rcu_segcblist_n_cbs(&rdp->cblist) for CPU2 and completes the loop iteration after setting ->barrier_seq_snap. 6. As there isn't any ->barrier_head callback entrained; at this point, rcu_barrier() in CPU0 returns. 7. The callbacks, which migrated from CPU2 to CPU1, execute. Straightforward per-CPU locking is also subject to the following race condition noted by Boqun Feng: 1. CPU0 calls rcu_barrier(), starting a new barrier sequence by invoking rcu_seq_start() and init_completion(), but does not yet initialize rcu_state.barrier_cpu_count. 2. CPU1 starts offlining for CPU2, calling rcutree_migrate_callbacks(), which in turn calls rcu_barrier_entrain() holding CPU2's. rdp->barrier_lock. It then entrains ->barrier_head for CPU2 and atomically increments rcu_state.barrier_cpu_count, which is unfortunately not yet initialized to the value 2. 3. The just-entrained RCU callback is invoked. It atomically decrements rcu_state.barrier_cpu_count and sees that it is now zero. This callback therefore invokes complete(). 4. CPU0 continues executing rcu_barrier(), but is not blocked by its call to wait_for_completion(). This results in rcu_barrier() returning before all pre-existing callbacks have been invoked, which is a bug. Therefore, synchronization is provided by rcu_state.barrier_lock, which is also held across the initialization sequence, especially the rcu_seq_start() and the atomic_set() that sets rcu_state.barrier_cpu_count to the value 2. In addition, this lock is held when entraining the rcu_barrier() callback, when deciding whether or not a CPU has callbacks that rcu_barrier() must wait on, when setting the ->qsmaskinitnext for incoming CPUs, and when migrating callbacks from a CPU that is going offline. Reviewed-by: Frederic Weisbecker Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 28 ++++++++++++++-------------- kernel/rcu/tree.h | 3 ++- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 004ff1c0d192f..2d70b91e3fbcc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -87,6 +87,7 @@ static struct rcu_state rcu_state = { .gp_state = RCU_GP_IDLE, .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), + .barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock), .name = RCU_NAME, .abbr = RCU_ABBR, .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), @@ -3994,7 +3995,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence); unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); - lockdep_assert_held(&rdp->barrier_lock); + lockdep_assert_held(&rcu_state.barrier_lock); if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) return; rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); @@ -4023,9 +4024,9 @@ static void rcu_barrier_handler(void *cpu_in) lockdep_assert_irqs_disabled(); WARN_ON_ONCE(cpu != rdp->cpu); WARN_ON_ONCE(cpu != smp_processor_id()); - raw_spin_lock(&rdp->barrier_lock); + raw_spin_lock(&rcu_state.barrier_lock); rcu_barrier_entrain(rdp); - raw_spin_unlock(&rdp->barrier_lock); + raw_spin_unlock(&rcu_state.barrier_lock); } /** @@ -4058,6 +4059,7 @@ void rcu_barrier(void) } /* Mark the start of the barrier operation. */ + raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); rcu_seq_start(&rcu_state.barrier_sequence); gseq = rcu_state.barrier_sequence; rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence); @@ -4071,7 +4073,7 @@ void rcu_barrier(void) */ init_completion(&rcu_state.barrier_completion); atomic_set(&rcu_state.barrier_cpu_count, 2); - cpus_read_lock(); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); /* * Force each CPU with callbacks to register a new callback. @@ -4083,21 +4085,21 @@ void rcu_barrier(void) retry: if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq) continue; - raw_spin_lock_irqsave(&rdp->barrier_lock, flags); + raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); if (!rcu_segcblist_n_cbs(&rdp->cblist)) { WRITE_ONCE(rdp->barrier_seq_snap, gseq); - raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence); continue; } if (!rcu_rdp_cpu_online(rdp)) { rcu_barrier_entrain(rdp); WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); - raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence); continue; } - raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) { schedule_timeout_uninterruptible(1); goto retry; @@ -4105,7 +4107,6 @@ retry: WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence); } - cpus_read_unlock(); /* * Now that we have an rcu_barrier_callback() callback on each @@ -4173,7 +4174,6 @@ rcu_boot_init_percpu_data(int cpu) INIT_WORK(&rdp->strict_work, strict_work_handler); WARN_ON_ONCE(rdp->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); - raw_spin_lock_init(&rdp->barrier_lock); rdp->barrier_seq_snap = rcu_state.barrier_sequence; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; @@ -4325,10 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu) local_irq_save(flags); arch_spin_lock(&rcu_state.ofl_lock); rcu_dynticks_eqs_online(); - raw_spin_lock(&rdp->barrier_lock); + raw_spin_lock(&rcu_state.barrier_lock); raw_spin_lock_rcu_node(rnp); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); - raw_spin_unlock(&rdp->barrier_lock); + raw_spin_unlock(&rcu_state.barrier_lock); newcpu = !(rnp->expmaskinitnext & mask); rnp->expmaskinitnext |= mask; /* Allow lockless access for expedited grace periods. */ @@ -4415,7 +4415,7 @@ void rcutree_migrate_callbacks(int cpu) rcu_segcblist_empty(&rdp->cblist)) return; /* No callbacks to migrate. */ - raw_spin_lock_irqsave(&rdp->barrier_lock, flags); + raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); WARN_ON_ONCE(rcu_rdp_cpu_online(rdp)); rcu_barrier_entrain(rdp); my_rdp = this_cpu_ptr(&rcu_data); @@ -4427,7 +4427,7 @@ void rcutree_migrate_callbacks(int cpu) needwake = rcu_advance_cbs(my_rnp, rdp) || rcu_advance_cbs(my_rnp, my_rdp); rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); - raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */ + raw_spin_unlock(&rcu_state.barrier_lock); /* irqs remain disabled. */ needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp); rcu_segcblist_disable(&rdp->cblist); WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist)); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 84362951ed9e1..a2d7ffd634cc1 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -188,7 +188,6 @@ struct rcu_data { bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ /* 4) rcu_barrier(), OOM callbacks, and expediting. */ - raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */ unsigned long barrier_seq_snap; /* Snap of rcu_state.barrier_sequence. */ struct rcu_head barrier_head; int exp_dynticks_snap; /* Double-check need for IPI. */ @@ -323,6 +322,8 @@ struct rcu_state { /* rcu_barrier(). */ /* End of fields guarded by barrier_mutex. */ + raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */ + struct mutex exp_mutex; /* Serialize expedited GP. */ struct mutex exp_wake_mutex; /* Serialize wakeup. */ unsigned long expedited_sequence; /* Take a ticket. */ -- GitLab From 5ae0f1b58b28b53f4ab3708ef9337a2665e79664 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Dec 2021 13:44:17 -0800 Subject: [PATCH 0306/1586] rcu: Create and use an rcu_rdp_cpu_online() The pattern "rdp->grpmask & rcu_rnp_online_cpus(rnp)" occurs frequently in RCU code in order to determine whether rdp->cpu is online from an RCU perspective. This commit therefore creates an rcu_rdp_cpu_online() function to replace it. [ paulmck: Apply kernel test robot unused-variable feedback. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 21 +++++++++++++-------- kernel/rcu/tree_plugin.h | 6 ++---- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2d70b91e3fbcc..1d3507d563db1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -222,6 +222,16 @@ static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) return READ_ONCE(rnp->qsmaskinitnext); } +/* + * Is the CPU corresponding to the specified rcu_data structure online + * from RCU's perspective? This perspective is given by that structure's + * ->qsmaskinitnext field rather than by the global cpu_online_mask. + */ +static bool rcu_rdp_cpu_online(struct rcu_data *rdp) +{ + return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode)); +} + /* * Return true if an RCU grace period is in progress. The READ_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -1168,14 +1178,12 @@ void rcu_request_urgent_qs_task(struct task_struct *t) bool rcu_lockdep_current_cpu_online(void) { struct rcu_data *rdp; - struct rcu_node *rnp; bool ret = false; if (in_nmi() || !rcu_scheduler_fully_active) return true; preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); - rnp = rdp->mynode; /* * Strictly, we care here about the case where the current CPU is * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask @@ -1183,8 +1191,7 @@ bool rcu_lockdep_current_cpu_online(void) * false positive if it's held by some *other* CPU, but that's * OK because that just means a false *negative* on the warning. */ - if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || - arch_spin_is_locked(&rcu_state.ofl_lock)) + if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock)) ret = true; preempt_enable_notrace(); return ret; @@ -1269,8 +1276,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * For more detail, please refer to the "Hotplug CPU" section * of RCU's Requirements documentation. */ - if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) { - bool onl; + if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) { struct rcu_node *rnp1; pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", @@ -1279,9 +1285,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n", __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask); - onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n", - __func__, rdp->cpu, ".o"[onl], + __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)], (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); return 1; /* Break things loose after complaining. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a15..d3db2168598ef 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -330,7 +330,7 @@ void rcu_note_context_switch(bool preempt) * then queue the task as required based on the states * of any ongoing and expedited grace periods. */ - WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); + WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp)); WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); trace_rcu_preempt_task(rcu_state.name, t->pid, @@ -773,7 +773,6 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) int cpu; int i; struct list_head *lhp; - bool onl; struct rcu_data *rdp; struct rcu_node *rnp1; @@ -797,9 +796,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) pr_cont("\n"); for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { rdp = per_cpu_ptr(&rcu_data, cpu); - onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", - cpu, ".o"[onl], + cpu, ".o"[rcu_rdp_cpu_online(rdp)], (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); } -- GitLab From 2bcd18e041fc3c2ae58f41eb5e18790c7c82c674 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Feb 2022 15:42:36 -0800 Subject: [PATCH 0307/1586] rcu-tasks: Use order_base_2() instead of ilog2() The ilog2() function can be used to generate a shift count, but it will generate the same count for a power of two as for one greater than a power of two. This results in shift counts that are larger than necessary for systems with a power-of-two number of CPUs because the CPUs are numbered from zero, so that the maximum CPU number is one less than that power of two. This commit therefore substitutes order_base_2(), which appears to have been designed for exactly this use case. Suggested-by: Mark Rutland Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d64f0b1d8cd3b..670c75cbcb98d 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \ .call_func = call, \ .rtpcpu = &rt_name ## __percpu, \ .name = n, \ - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ + .percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \ .percpu_enqueue_lim = 1, \ .percpu_dequeue_lim = 1, \ .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ @@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, if (unlikely(needadjust)) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); + WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids)); WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); @@ -417,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim > 1) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); + WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids)); smp_store_release(&rtp->percpu_enqueue_lim, 1); rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); -- GitLab From 00a8b4b54cd69d9f7ba1730d3b266469a778b1d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Feb 2022 16:34:40 -0800 Subject: [PATCH 0308/1586] rcu-tasks: Set ->percpu_enqueue_shift to zero upon contention Currently, call_rcu_tasks_generic() sets ->percpu_enqueue_shift to order_base_2(nr_cpu_ids) upon encountering sufficient contention. This does not shift to use of non-CPU-0 callback queues as intended, but rather continues using only CPU 0's queue. Although this does provide some decrease in contention due to spreading work over multiple locks, it is not the dramatic decrease that was intended. This commit therefore makes call_rcu_tasks_generic() set ->percpu_enqueue_shift to 0. Reported-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 670c75cbcb98d..ac17348187e48 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, if (unlikely(needadjust)) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, 0); WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); -- GitLab From c6c89783eba05a5e159b07cfd8c68d841cc5de42 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 15:39:36 -0800 Subject: [PATCH 0309/1586] fscrypt: add functions for direct I/O support Encrypted files traditionally haven't supported DIO, due to the need to encrypt/decrypt the data. However, when the encryption is implemented using inline encryption (blk-crypto) instead of the traditional filesystem-layer encryption, it is straightforward to support DIO. In preparation for supporting this, add the following functions: - fscrypt_dio_supported() checks whether a DIO request is supported as far as encryption is concerned. Encrypted files will only support DIO when inline encryption is used and the I/O request is properly aligned; this function checks these preconditions. - fscrypt_limit_io_blocks() limits the length of a bio to avoid crossing a place in the file that a bio with an encryption context cannot cross due to a DUN discontiguity. This function is needed by filesystems that use the iomap DIO implementation (which operates directly on logical ranges, so it won't use fscrypt_mergeable_bio()) and that support FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32. Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220128233940.79464-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 8 ++++ fs/crypto/inline_crypt.c | 93 ++++++++++++++++++++++++++++++++++++++++ include/linux/fscrypt.h | 18 ++++++++ 3 files changed, 119 insertions(+) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 4ef3f714046aa..4fcca79f39aeb 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -69,6 +69,14 @@ void fscrypt_free_bounce_page(struct page *bounce_page) } EXPORT_SYMBOL(fscrypt_free_bounce_page); +/* + * Generate the IV for the given logical block number within the given file. + * For filenames encryption, lblk_num == 0. + * + * Keep this in sync with fscrypt_limit_io_blocks(). fscrypt_limit_io_blocks() + * needs to know about any IV generation methods where the low bits of IV don't + * simply contain the lblk_num (e.g., IV_INO_LBLK_32). + */ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index c57bebfa48fea..93c2ca8580923 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "fscrypt_private.h" @@ -315,6 +316,10 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); * * fscrypt_set_bio_crypt_ctx() must have already been called on the bio. * + * This function isn't required in cases where crypto-mergeability is ensured in + * another way, such as I/O targeting only a single file (and thus a single key) + * combined with fscrypt_limit_io_blocks() to ensure DUN contiguity. + * * Return: true iff the I/O is mergeable */ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, @@ -363,3 +368,91 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio, return fscrypt_mergeable_bio(bio, inode, next_lblk); } EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh); + +/** + * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is + * supported as far as encryption is concerned + * @iocb: the file and position the I/O is targeting + * @iter: the I/O data segment(s) + * + * Return: %true if there are no encryption constraints that prevent DIO from + * being supported; %false if DIO is unsupported. (Note that in the + * %true case, the filesystem might have other, non-encryption-related + * constraints that prevent DIO from actually being supported.) + */ +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter) +{ + const struct inode *inode = file_inode(iocb->ki_filp); + const unsigned int blocksize = i_blocksize(inode); + + /* If the file is unencrypted, no veto from us. */ + if (!fscrypt_needs_contents_encryption(inode)) + return true; + + /* We only support DIO with inline crypto, not fs-layer crypto. */ + if (!fscrypt_inode_uses_inline_crypto(inode)) + return false; + + /* + * Since the granularity of encryption is filesystem blocks, the file + * position and total I/O length must be aligned to the filesystem block + * size -- not just to the block device's logical block size as is + * traditionally the case for DIO on many filesystems. + * + * We require that the user-provided memory buffers be filesystem block + * aligned too. It is simpler to have a single alignment value required + * for all properties of the I/O, as is normally the case for DIO. + * Also, allowing less aligned buffers would imply that data units could + * cross bvecs, which would greatly complicate the I/O stack, which + * assumes that bios can be split at any bvec boundary. + */ + if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(fscrypt_dio_supported); + +/** + * fscrypt_limit_io_blocks() - limit I/O blocks to avoid discontiguous DUNs + * @inode: the file on which I/O is being done + * @lblk: the block at which the I/O is being started from + * @nr_blocks: the number of blocks we want to submit starting at @lblk + * + * Determine the limit to the number of blocks that can be submitted in a bio + * targeting @lblk without causing a data unit number (DUN) discontiguity. + * + * This is normally just @nr_blocks, as normally the DUNs just increment along + * with the logical blocks. (Or the file is not encrypted.) + * + * In rare cases, fscrypt can be using an IV generation method that allows the + * DUN to wrap around within logically contiguous blocks, and that wraparound + * will occur. If this happens, a value less than @nr_blocks will be returned + * so that the wraparound doesn't occur in the middle of a bio, which would + * cause encryption/decryption to produce wrong results. + * + * Return: the actual number of blocks that can be submitted + */ +u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) +{ + const struct fscrypt_info *ci; + u32 dun; + + if (!fscrypt_inode_uses_inline_crypto(inode)) + return nr_blocks; + + if (nr_blocks <= 1) + return nr_blocks; + + ci = inode->i_crypt_info; + if (!(fscrypt_policy_flags(&ci->ci_policy) & + FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) + return nr_blocks; + + /* With IV_INO_LBLK_32, the DUN can wrap around from U32_MAX to 0. */ + + dun = ci->ci_hashed_ino + lblk; + + return min_t(u64, nr_blocks, (u64)U32_MAX + 1 - dun); +} +EXPORT_SYMBOL_GPL(fscrypt_limit_io_blocks); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 91ea9477e9bd2..50d92d805bd8c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -714,6 +714,10 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); + +u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); + #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) @@ -742,6 +746,20 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, { return true; } + +static inline bool fscrypt_dio_supported(struct kiocb *iocb, + struct iov_iter *iter) +{ + const struct inode *inode = file_inode(iocb->ki_filp); + + return !fscrypt_needs_contents_encryption(inode); +} + +static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, + u64 nr_blocks) +{ + return nr_blocks; +} #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /** -- GitLab From 489734ef94f4f78087e103ef1bd9019968ff8dbd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 15:39:37 -0800 Subject: [PATCH 0310/1586] iomap: support direct I/O with fscrypt using blk-crypto Encrypted files traditionally haven't supported DIO, due to the need to encrypt/decrypt the data. However, when the encryption is implemented using inline encryption (blk-crypto) instead of the traditional filesystem-layer encryption, it is straightforward to support DIO. Add support for this to the iomap DIO implementation by calling fscrypt_set_bio_crypt_ctx() to set encryption contexts on the bios. Don't check for the rare case where a DUN (crypto data unit number) discontiguity creates a boundary that bios must not cross. Instead, filesystems are expected to handle this in ->iomap_begin() by limiting the length of the mapping so that iomap doesn't have to worry about it. Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Acked-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220128233940.79464-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/iomap/direct-io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 03ea367df19a4..20325b3926fa3 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -179,11 +180,14 @@ static void iomap_dio_bio_end_io(struct bio *bio) static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, loff_t pos, unsigned len) { + struct inode *inode = file_inode(dio->iocb->ki_filp); struct page *page = ZERO_PAGE(0); int flags = REQ_SYNC | REQ_IDLE; struct bio *bio; bio = bio_alloc(GFP_KERNEL, 1); + fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, iter->iomap.bdev); bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); bio->bi_private = dio; @@ -310,6 +314,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, } bio = bio_alloc(GFP_KERNEL, nr_pages); + fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap_sector(iomap, pos); bio->bi_write_hint = dio->iocb->ki_hint; -- GitLab From 38ea50daa7a447dbcd7031f37a39a1baa163b2ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 15:39:38 -0800 Subject: [PATCH 0311/1586] ext4: support direct I/O with fscrypt using blk-crypto Encrypted files traditionally haven't supported DIO, due to the need to encrypt/decrypt the data. However, when the encryption is implemented using inline encryption (blk-crypto) instead of the traditional filesystem-layer encryption, it is straightforward to support DIO. Therefore, make ext4 support DIO on files that are using inline encryption. Since ext4 uses iomap for DIO, and fscrypt support was already added to iomap DIO, this just requires two small changes: - Let DIO proceed when supported, by checking fscrypt_dio_supported() instead of assuming that encrypted files never support DIO. - In ext4_iomap_begin(), use fscrypt_limit_io_blocks() to limit the length of the mapping in the rare case where a DUN discontiguity occurs in the middle of an extent. The iomap DIO implementation requires this, since it assumes that it can submit a bio covering (up to) the whole mapping, without checking fscrypt constraints itself. Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Acked-by: Theodore Ts'o Reviewed-by: Jaegeuk Kim Link: https://lore.kernel.org/r/20220128233940.79464-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ext4/file.c | 10 ++++++---- fs/ext4/inode.c | 7 +++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8cc11715518ac..8bd66cdc41be2 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -36,9 +36,11 @@ #include "acl.h" #include "truncate.h" -static bool ext4_dio_supported(struct inode *inode) +static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter) { - if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode)) + struct inode *inode = file_inode(iocb->ki_filp); + + if (!fscrypt_dio_supported(iocb, iter)) return false; if (fsverity_active(inode)) return false; @@ -61,7 +63,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) inode_lock_shared(inode); } - if (!ext4_dio_supported(inode)) { + if (!ext4_dio_supported(iocb, to)) { inode_unlock_shared(inode); /* * Fallback to buffered I/O if the operation being performed on @@ -509,7 +511,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* Fallback to buffered I/O if the inode does not support direct I/O. */ - if (!ext4_dio_supported(inode)) { + if (!ext4_dio_supported(iocb, from)) { if (ilock_shared) inode_unlock_shared(inode); else diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 01c9e4f743ba9..4cf55ef54193a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3409,6 +3409,13 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret < 0) return ret; out: + /* + * When inline encryption is enabled, sometimes I/O to an encrypted file + * has to be broken up to guarantee DUN contiguity. Handle this by + * limiting the length of the mapping returned. + */ + map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); + ext4_set_iomap(inode, iomap, &map, offset, length, flags); return 0; -- GitLab From 8a2c77bc2a9a44d1a80dee1f320ea8f7516b98ee Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 15:39:39 -0800 Subject: [PATCH 0312/1586] f2fs: support direct I/O with fscrypt using blk-crypto Encrypted files traditionally haven't supported DIO, due to the need to encrypt/decrypt the data. However, when the encryption is implemented using inline encryption (blk-crypto) instead of the traditional filesystem-layer encryption, it is straightforward to support DIO. Therefore, make f2fs support DIO on files that are using inline encryption. Since f2fs uses iomap for DIO, and fscrypt support was already added to iomap DIO, this just requires two small changes: - Let DIO proceed when supported, by checking fscrypt_dio_supported() instead of assuming that encrypted files never support DIO. - In f2fs_iomap_begin(), use fscrypt_limit_io_blocks() to limit the length of the mapping in the rare case where a DUN discontiguity occurs in the middle of an extent. The iomap DIO implementation requires this, since it assumes that it can submit a bio covering (up to) the whole mapping, without checking fscrypt constraints itself. Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Acked-by: Jaegeuk Kim Link: https://lore.kernel.org/r/20220128233940.79464-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/f2fs/data.c | 7 +++++++ fs/f2fs/f2fs.h | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c417864c66ae..020d47f97969c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4044,6 +4044,13 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->offset = blks_to_bytes(inode, map.m_lblk); + /* + * When inline encryption is enabled, sometimes I/O to an encrypted file + * has to be broken up to guarantee DUN contiguity. Handle this by + * limiting the length of the mapping returned. + */ + map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); + if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) { iomap->length = blks_to_bytes(inode, map.m_len); if (map.m_flags & F2FS_MAP_MAPPED) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68b44015514f5..8130b092e5432 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4371,7 +4371,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - if (f2fs_post_read_required(inode)) + if (!fscrypt_dio_supported(iocb, iter)) + return true; + if (fsverity_active(inode)) + return true; + if (f2fs_compressed_file(inode)) return true; /* disallow direct IO if any of devices has unaligned blksize */ -- GitLab From cdaa1b1941f667814300799ddb74f3079517cd5a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 28 Jan 2022 15:39:40 -0800 Subject: [PATCH 0313/1586] fscrypt: update documentation for direct I/O support Now that direct I/O is supported on encrypted files in some cases, document what these cases are. Link: https://lore.kernel.org/r/20220128233940.79464-6-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 4d5d50dca65c6..6ccd5efb25b77 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -1047,8 +1047,8 @@ astute users may notice some differences in behavior: may be used to overwrite the source files but isn't guaranteed to be effective on all filesystems and storage devices. -- Direct I/O is not supported on encrypted files. Attempts to use - direct I/O on such files will fall back to buffered I/O. +- Direct I/O is supported on encrypted files only under some + circumstances. For details, see `Direct I/O support`_. - The fallocate operations FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will @@ -1179,6 +1179,27 @@ Inline encryption doesn't affect the ciphertext or other aspects of the on-disk format, so users may freely switch back and forth between using "inlinecrypt" and not using "inlinecrypt". +Direct I/O support +================== + +For direct I/O on an encrypted file to work, the following conditions +must be met (in addition to the conditions for direct I/O on an +unencrypted file): + +* The file must be using inline encryption. Usually this means that + the filesystem must be mounted with ``-o inlinecrypt`` and inline + encryption hardware must be present. However, a software fallback + is also available. For details, see `Inline encryption support`_. + +* The I/O request must be fully aligned to the filesystem block size. + This means that the file position the I/O is targeting, the lengths + of all I/O segments, and the memory addresses of all I/O buffers + must be multiples of this value. Note that the filesystem block + size may be greater than the logical block size of the block device. + +If either of the above conditions is not met, then direct I/O on the +encrypted file will fall back to buffered I/O. + Implementation details ====================== -- GitLab From f233673cd32a048f2eed69e56b61174c33fb740b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Jan 2022 09:39:15 -0800 Subject: [PATCH 0314/1586] torture: Make torture.sh help message match reality This commit fixes a couple of typos: s/--doall/--do-all/ and s/--doallmodconfig/--do-allmodconfig/. [ paulmck: Add Fixes: supplied by Paul Menzel. ] Fixes: a115a775a8d5 ("torture: Add "make allmodconfig" to torture.sh") Reported-by: Paul Menzel Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index e00e60efb2310..bfe09e2829c8d 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -71,8 +71,8 @@ usage () { echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" - echo " --doall" - echo " --doallmodconfig / --do-no-allmodconfig" + echo " --do-all" + echo " --do-allmodconfig / --do-no-allmodconfig" echo " --do-clocksourcewd / --do-no-clocksourcewd" echo " --do-kasan / --do-no-kasan" echo " --do-kcsan / --do-no-kcsan" -- GitLab From b5597cb36f8bb29b244b2f90030d54bf81bf6fbc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Jan 2022 15:03:36 -0800 Subject: [PATCH 0315/1586] rcutorture: Test SRCU size transitions Thie commit adds kernel boot parameters to the SRCU-N and SRCU-P rcutorture scenarios to cause SRCU-N to test contention-based resizing and SRCU-P to test init_srcu_struct()-time resizing. Note that this also tests never-resizing because the contention-based resizing normally takes some minutes to make the shift. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot | 1 + tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot index 238bfe3bd0ccc..ce0694fd9b929 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot @@ -1 +1,2 @@ rcutorture.torture_type=srcu +rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index ce48c7b826734..2db39f298d182 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1,2 +1,4 @@ rcutorture.torture_type=srcud rcupdate.rcu_self_test=1 +rcutorture.fwd_progress=3 +srcutree.big_cpu_lim=5 -- GitLab From 8ea7a53daf3c9d26910ee9a115b2fb6b86cf3c01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Feb 2022 08:23:46 -0800 Subject: [PATCH 0316/1586] rcutorture: Provide non-power-of-two Tasks RCU scenarios This commit adjusts RUDE01 to 3 CPUs and TRACE01 to 5 CPUs in order to test Tasks RCU's ability to handle non-power-of-two numbers of CPUs. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/RUDE01 | 2 +- tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 index 3ca112444ce77..7093422050f66 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=3 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 34c8ff5a12f20..e4d74e5fc1d09 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=5 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n -- GitLab From 7d8e4c98d13b6b54655140d6b6a6a17fbc9b32f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Jan 2022 18:51:57 +0100 Subject: [PATCH 0317/1586] staging: fbtft: Fix error path in fbtft_driver_module_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If registering the platform driver fails, the function must not return without undoing the spi driver registration first. Fixes: c296d5f9957c ("staging: fbtft: core support") Link: https://lore.kernel.org/r/20220118181338.207943-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220123175201.34839-2-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/staging/fbtft/fbtft.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 4cdec34e23d2e..55677efc01380 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -334,7 +334,10 @@ static int __init fbtft_driver_module_init(void) \ ret = spi_register_driver(&fbtft_driver_spi_driver); \ if (ret < 0) \ return ret; \ - return platform_driver_register(&fbtft_driver_platform_driver); \ + ret = platform_driver_register(&fbtft_driver_platform_driver); \ + if (ret < 0) \ + spi_unregister_driver(&fbtft_driver_spi_driver); \ + return ret; \ } \ \ static void __exit fbtft_driver_module_exit(void) \ -- GitLab From c222ea5d2f3361458672d1f52c78970aa021cb61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Jan 2022 18:51:58 +0100 Subject: [PATCH 0318/1586] staging: fbtft: Deduplicate driver registration macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two macros FBTFT_REGISTER_DRIVER and FBTFT_REGISTER_SPI_DRIVER contain quite some duplication: Both define an spi driver and an of device table and the differences are quite subtle. So create two new macros and use both twice. Link: https://lore.kernel.org/r/20220118181338.207943-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220123175201.34839-3-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/staging/fbtft/fbtft.h | 93 ++++++++++++++--------------------- 1 file changed, 36 insertions(+), 57 deletions(-) diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 55677efc01380..6a7545b5bcd2d 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -272,21 +272,40 @@ void fbtft_write_reg8_bus9(struct fbtft_par *par, int len, ...); void fbtft_write_reg16_bus8(struct fbtft_par *par, int len, ...); void fbtft_write_reg16_bus16(struct fbtft_par *par, int len, ...); +#define FBTFT_DT_TABLE(_compatible) \ +static const struct of_device_id dt_ids[] = { \ + { .compatible = _compatible }, \ + {}, \ +}; \ +MODULE_DEVICE_TABLE(of, dt_ids); + +#define FBTFT_SPI_DRIVER(_name, _compatible, _display, _spi_ids) \ + \ +static int fbtft_driver_probe_spi(struct spi_device *spi) \ +{ \ + return fbtft_probe_common(_display, spi, NULL); \ +} \ + \ +static int fbtft_driver_remove_spi(struct spi_device *spi) \ +{ \ + struct fb_info *info = spi_get_drvdata(spi); \ + \ + fbtft_remove_common(&spi->dev, info); \ + return 0; \ +} \ + \ +static struct spi_driver fbtft_driver_spi_driver = { \ + .driver = { \ + .name = _name, \ + .of_match_table = dt_ids, \ + }, \ + .id_table = _spi_ids, \ + .probe = fbtft_driver_probe_spi, \ + .remove = fbtft_driver_remove_spi, \ +}; + #define FBTFT_REGISTER_DRIVER(_name, _compatible, _display) \ \ -static int fbtft_driver_probe_spi(struct spi_device *spi) \ -{ \ - return fbtft_probe_common(_display, spi, NULL); \ -} \ - \ -static int fbtft_driver_remove_spi(struct spi_device *spi) \ -{ \ - struct fb_info *info = spi_get_drvdata(spi); \ - \ - fbtft_remove_common(&spi->dev, info); \ - return 0; \ -} \ - \ static int fbtft_driver_probe_pdev(struct platform_device *pdev) \ { \ return fbtft_probe_common(_display, NULL, pdev); \ @@ -300,22 +319,9 @@ static int fbtft_driver_remove_pdev(struct platform_device *pdev) \ return 0; \ } \ \ -static const struct of_device_id dt_ids[] = { \ - { .compatible = _compatible }, \ - {}, \ -}; \ - \ -MODULE_DEVICE_TABLE(of, dt_ids); \ +FBTFT_DT_TABLE(_compatible) \ \ - \ -static struct spi_driver fbtft_driver_spi_driver = { \ - .driver = { \ - .name = _name, \ - .of_match_table = dt_ids, \ - }, \ - .probe = fbtft_driver_probe_spi, \ - .remove = fbtft_driver_remove_spi, \ -}; \ +FBTFT_SPI_DRIVER(_name, _compatible, _display, NULL) \ \ static struct platform_driver fbtft_driver_platform_driver = { \ .driver = { \ @@ -351,42 +357,15 @@ module_exit(fbtft_driver_module_exit); #define FBTFT_REGISTER_SPI_DRIVER(_name, _comp_vend, _comp_dev, _display) \ \ -static int fbtft_driver_probe_spi(struct spi_device *spi) \ -{ \ - return fbtft_probe_common(_display, spi, NULL); \ -} \ - \ -static int fbtft_driver_remove_spi(struct spi_device *spi) \ -{ \ - struct fb_info *info = spi_get_drvdata(spi); \ - \ - fbtft_remove_common(&spi->dev, info); \ - return 0; \ -} \ - \ -static const struct of_device_id dt_ids[] = { \ - { .compatible = _comp_vend "," _comp_dev }, \ - {}, \ -}; \ - \ -MODULE_DEVICE_TABLE(of, dt_ids); \ +FBTFT_DT_TABLE(_comp_vend "," _comp_dev) \ \ static const struct spi_device_id spi_ids[] = { \ { .name = _comp_dev }, \ {}, \ }; \ - \ MODULE_DEVICE_TABLE(spi, spi_ids); \ \ -static struct spi_driver fbtft_driver_spi_driver = { \ - .driver = { \ - .name = _name, \ - .of_match_table = dt_ids, \ - }, \ - .id_table = spi_ids, \ - .probe = fbtft_driver_probe_spi, \ - .remove = fbtft_driver_remove_spi, \ -}; \ +FBTFT_SPI_DRIVER(_name, _comp_vend "," _comp_dev, _display, spi_ids) \ \ module_spi_driver(fbtft_driver_spi_driver); -- GitLab From 316f569df766df9a49c36c052ec6afaf19cb6933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Jan 2022 18:51:59 +0100 Subject: [PATCH 0319/1586] tpm: st33zp24: Make st33zp24_remove() a void function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now st33zp24_remove() returns zero unconditionally. Make it return no value instead which makes it easier to see in the callers that there is no error to handle. Also the return value of i2c and spi remove callbacks is ignored anyway. Link: https://lore.kernel.org/r/20220104231103.227924-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220123175201.34839-4-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/char/tpm/st33zp24/i2c.c | 5 +---- drivers/char/tpm/st33zp24/spi.c | 5 +---- drivers/char/tpm/st33zp24/st33zp24.c | 3 +-- drivers/char/tpm/st33zp24/st33zp24.h | 2 +- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c index 7c617edff4ca2..3170d59d660c0 100644 --- a/drivers/char/tpm/st33zp24/i2c.c +++ b/drivers/char/tpm/st33zp24/i2c.c @@ -267,11 +267,8 @@ static int st33zp24_i2c_probe(struct i2c_client *client, static int st33zp24_i2c_remove(struct i2c_client *client) { struct tpm_chip *chip = i2c_get_clientdata(client); - int ret; - ret = st33zp24_remove(chip); - if (ret) - return ret; + st33zp24_remove(chip); return 0; } diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c index a75dafd394451..ccd9e42b8eab4 100644 --- a/drivers/char/tpm/st33zp24/spi.c +++ b/drivers/char/tpm/st33zp24/spi.c @@ -384,11 +384,8 @@ static int st33zp24_spi_probe(struct spi_device *dev) static int st33zp24_spi_remove(struct spi_device *dev) { struct tpm_chip *chip = spi_get_drvdata(dev); - int ret; - ret = st33zp24_remove(chip); - if (ret) - return ret; + st33zp24_remove(chip); return 0; } diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c index ce9efb73c144b..15b393e92c8ec 100644 --- a/drivers/char/tpm/st33zp24/st33zp24.c +++ b/drivers/char/tpm/st33zp24/st33zp24.c @@ -511,10 +511,9 @@ _tpm_clean_answer: } EXPORT_SYMBOL(st33zp24_probe); -int st33zp24_remove(struct tpm_chip *chip) +void st33zp24_remove(struct tpm_chip *chip) { tpm_chip_unregister(chip); - return 0; } EXPORT_SYMBOL(st33zp24_remove); diff --git a/drivers/char/tpm/st33zp24/st33zp24.h b/drivers/char/tpm/st33zp24/st33zp24.h index 6747be1e25021..b387a476c555f 100644 --- a/drivers/char/tpm/st33zp24/st33zp24.h +++ b/drivers/char/tpm/st33zp24/st33zp24.h @@ -34,5 +34,5 @@ int st33zp24_pm_resume(struct device *dev); int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops, struct device *dev, int irq, int io_lpcpd); -int st33zp24_remove(struct tpm_chip *chip); +void st33zp24_remove(struct tpm_chip *chip); #endif /* __LOCAL_ST33ZP24_H__ */ -- GitLab From afb0a80e63d67e957b5d0eb4ade301aff6e13c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Jan 2022 18:52:00 +0100 Subject: [PATCH 0320/1586] platform/chrome: cros_ec: Make cros_ec_unregister() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now cros_ec_unregister() returns zero unconditionally. Make it return void instead which makes it easier to see in the callers that there is no error to handle. Also the return value of i2c, platform and spi remove callbacks is ignored anyway. Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211020071753.wltjslmimb6wtlp5@pengutronix.de Signed-off-by: Uwe Kleine-König Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220123175201.34839-5-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/platform/chrome/cros_ec.c | 4 +--- drivers/platform/chrome/cros_ec.h | 2 +- drivers/platform/chrome/cros_ec_i2c.c | 4 +++- drivers/platform/chrome/cros_ec_lpc.c | 4 +++- drivers/platform/chrome/cros_ec_spi.c | 4 +++- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c index fc5aa1525d13c..d49a4efe46c8a 100644 --- a/drivers/platform/chrome/cros_ec.c +++ b/drivers/platform/chrome/cros_ec.c @@ -302,13 +302,11 @@ EXPORT_SYMBOL(cros_ec_register); * * Return: 0 on success or negative error code. */ -int cros_ec_unregister(struct cros_ec_device *ec_dev) +void cros_ec_unregister(struct cros_ec_device *ec_dev) { if (ec_dev->pd) platform_device_unregister(ec_dev->pd); platform_device_unregister(ec_dev->ec); - - return 0; } EXPORT_SYMBOL(cros_ec_unregister); diff --git a/drivers/platform/chrome/cros_ec.h b/drivers/platform/chrome/cros_ec.h index 78363dcfdf23a..bbca0096868ac 100644 --- a/drivers/platform/chrome/cros_ec.h +++ b/drivers/platform/chrome/cros_ec.h @@ -11,7 +11,7 @@ #include int cros_ec_register(struct cros_ec_device *ec_dev); -int cros_ec_unregister(struct cros_ec_device *ec_dev); +void cros_ec_unregister(struct cros_ec_device *ec_dev); int cros_ec_suspend(struct cros_ec_device *ec_dev); int cros_ec_resume(struct cros_ec_device *ec_dev); diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c index 30c8938c27d54..22feb0fd4ce71 100644 --- a/drivers/platform/chrome/cros_ec_i2c.c +++ b/drivers/platform/chrome/cros_ec_i2c.c @@ -313,7 +313,9 @@ static int cros_ec_i2c_remove(struct i2c_client *client) { struct cros_ec_device *ec_dev = i2c_get_clientdata(client); - return cros_ec_unregister(ec_dev); + cros_ec_unregister(ec_dev); + + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c index d6306d2a096f1..7651417b4a25b 100644 --- a/drivers/platform/chrome/cros_ec_lpc.c +++ b/drivers/platform/chrome/cros_ec_lpc.c @@ -439,7 +439,9 @@ static int cros_ec_lpc_remove(struct platform_device *pdev) acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY, cros_ec_lpc_acpi_notify); - return cros_ec_unregister(ec_dev); + cros_ec_unregister(ec_dev); + + return 0; } static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = { diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index 14c4046fa04d5..713c58687721b 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -790,7 +790,9 @@ static int cros_ec_spi_remove(struct spi_device *spi) { struct cros_ec_device *ec_dev = spi_get_drvdata(spi); - return cros_ec_unregister(ec_dev); + cros_ec_unregister(ec_dev); + + return 0; } #ifdef CONFIG_PM_SLEEP -- GitLab From a0386bba70934d42f586eaf68b21d5eeaffa7bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 23 Jan 2022 18:52:01 +0100 Subject: [PATCH 0321/1586] spi: make remove callback a void function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value returned by an spi driver's remove function is mostly ignored. (Only an error message is printed if the value is non-zero that the error is ignored.) So change the prototype of the remove function to return no value. This way driver authors are not tempted to assume that passing an error to the upper layer is a good idea. All drivers are adapted accordingly. There is no intended change of behaviour, all callbacks were prepared to return 0 before. Signed-off-by: Uwe Kleine-König Acked-by: Marc Kleine-Budde Acked-by: Andy Shevchenko Reviewed-by: Geert Uytterhoeven Acked-by: Jérôme Pouiller Acked-by: Miquel Raynal Acked-by: Jonathan Cameron Acked-by: Claudius Heine Acked-by: Stefan Schmidt Acked-by: Alexandre Belloni Acked-by: Ulf Hansson # For MMC Acked-by: Marcus Folkesson Acked-by: Łukasz Stelmach Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220123175201.34839-6-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/bus/moxtet.c | 4 +--- drivers/char/tpm/st33zp24/spi.c | 4 +--- drivers/char/tpm/tpm_tis_spi_main.c | 3 +-- drivers/clk/clk-lmk04832.c | 4 +--- drivers/gpio/gpio-74x164.c | 4 +--- drivers/gpio/gpio-max3191x.c | 4 +--- drivers/gpio/gpio-max7301.c | 4 +--- drivers/gpio/gpio-mc33880.c | 4 +--- drivers/gpio/gpio-pisosr.c | 4 +--- drivers/gpu/drm/panel/panel-abt-y030xx067a.c | 4 +--- drivers/gpu/drm/panel/panel-ilitek-ili9322.c | 4 +--- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 3 +-- drivers/gpu/drm/panel/panel-innolux-ej030na.c | 4 +--- drivers/gpu/drm/panel/panel-lg-lb035q02.c | 4 +--- drivers/gpu/drm/panel/panel-lg-lg4573.c | 4 +--- drivers/gpu/drm/panel/panel-nec-nl8048hl11.c | 4 +--- drivers/gpu/drm/panel/panel-novatek-nt39016.c | 4 +--- drivers/gpu/drm/panel/panel-samsung-db7430.c | 3 +-- drivers/gpu/drm/panel/panel-samsung-ld9040.c | 4 +--- drivers/gpu/drm/panel/panel-samsung-s6d27a1.c | 3 +-- drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c | 3 +-- drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 4 +--- drivers/gpu/drm/panel/panel-sony-acx565akm.c | 4 +--- drivers/gpu/drm/panel/panel-tpo-td028ttec1.c | 4 +--- drivers/gpu/drm/panel/panel-tpo-td043mtea1.c | 4 +--- drivers/gpu/drm/panel/panel-tpo-tpg110.c | 3 +-- drivers/gpu/drm/panel/panel-widechips-ws2401.c | 3 +-- drivers/gpu/drm/tiny/hx8357d.c | 4 +--- drivers/gpu/drm/tiny/ili9163.c | 4 +--- drivers/gpu/drm/tiny/ili9225.c | 4 +--- drivers/gpu/drm/tiny/ili9341.c | 4 +--- drivers/gpu/drm/tiny/ili9486.c | 4 +--- drivers/gpu/drm/tiny/mi0283qt.c | 4 +--- drivers/gpu/drm/tiny/repaper.c | 4 +--- drivers/gpu/drm/tiny/st7586.c | 4 +--- drivers/gpu/drm/tiny/st7735r.c | 4 +--- drivers/hwmon/adcxx.c | 4 +--- drivers/hwmon/adt7310.c | 3 +-- drivers/hwmon/max1111.c | 3 +-- drivers/hwmon/max31722.c | 4 +--- drivers/iio/accel/bma400_spi.c | 4 +--- drivers/iio/accel/bmc150-accel-spi.c | 4 +--- drivers/iio/accel/bmi088-accel-spi.c | 4 +--- drivers/iio/accel/kxsd9-spi.c | 4 +--- drivers/iio/accel/mma7455_spi.c | 4 +--- drivers/iio/accel/sca3000.c | 4 +--- drivers/iio/adc/ad7266.c | 4 +--- drivers/iio/adc/ltc2496.c | 4 +--- drivers/iio/adc/mcp320x.c | 4 +--- drivers/iio/adc/mcp3911.c | 4 +--- drivers/iio/adc/ti-adc12138.c | 4 +--- drivers/iio/adc/ti-ads7950.c | 4 +--- drivers/iio/adc/ti-ads8688.c | 4 +--- drivers/iio/adc/ti-tlc4541.c | 4 +--- drivers/iio/amplifiers/ad8366.c | 4 +--- drivers/iio/common/ssp_sensors/ssp_dev.c | 4 +--- drivers/iio/dac/ad5360.c | 4 +--- drivers/iio/dac/ad5380.c | 4 +--- drivers/iio/dac/ad5446.c | 4 +--- drivers/iio/dac/ad5449.c | 4 +--- drivers/iio/dac/ad5504.c | 4 +--- drivers/iio/dac/ad5592r.c | 4 +--- drivers/iio/dac/ad5624r_spi.c | 4 +--- drivers/iio/dac/ad5686-spi.c | 4 +--- drivers/iio/dac/ad5761.c | 4 +--- drivers/iio/dac/ad5764.c | 4 +--- drivers/iio/dac/ad5791.c | 4 +--- drivers/iio/dac/ad8801.c | 4 +--- drivers/iio/dac/ltc1660.c | 4 +--- drivers/iio/dac/ltc2632.c | 4 +--- drivers/iio/dac/mcp4922.c | 4 +--- drivers/iio/dac/ti-dac082s085.c | 4 +--- drivers/iio/dac/ti-dac7311.c | 3 +-- drivers/iio/frequency/adf4350.c | 4 +--- drivers/iio/gyro/bmg160_spi.c | 4 +--- drivers/iio/gyro/fxas21002c_spi.c | 4 +--- drivers/iio/health/afe4403.c | 4 +--- drivers/iio/magnetometer/bmc150_magn_spi.c | 4 +--- drivers/iio/magnetometer/hmc5843_spi.c | 4 +--- drivers/iio/potentiometer/max5487.c | 4 +--- drivers/iio/pressure/ms5611_spi.c | 4 +--- drivers/iio/pressure/zpa2326_spi.c | 4 +--- drivers/input/keyboard/applespi.c | 4 +--- drivers/input/misc/adxl34x-spi.c | 4 +--- drivers/input/touchscreen/ads7846.c | 4 +--- drivers/input/touchscreen/cyttsp4_spi.c | 4 +--- drivers/input/touchscreen/tsc2005.c | 4 +--- drivers/leds/leds-cr0014114.c | 4 +--- drivers/leds/leds-dac124s085.c | 4 +--- drivers/leds/leds-el15203000.c | 4 +--- drivers/leds/leds-spi-byte.c | 4 +--- drivers/media/spi/cxd2880-spi.c | 4 +--- drivers/media/spi/gs1662.c | 4 +--- drivers/media/tuners/msi001.c | 3 +-- drivers/mfd/arizona-spi.c | 4 +--- drivers/mfd/da9052-spi.c | 3 +-- drivers/mfd/ezx-pcap.c | 4 +--- drivers/mfd/madera-spi.c | 4 +--- drivers/mfd/mc13xxx-spi.c | 3 +-- drivers/mfd/rsmu_spi.c | 4 +--- drivers/mfd/stmpe-spi.c | 4 +--- drivers/mfd/tps65912-spi.c | 4 +--- drivers/misc/ad525x_dpot-spi.c | 3 +-- drivers/misc/eeprom/eeprom_93xx46.c | 4 +--- drivers/misc/lattice-ecp3-config.c | 4 +--- drivers/misc/lis3lv02d/lis3lv02d_spi.c | 4 +--- drivers/mmc/host/mmc_spi.c | 3 +-- drivers/mtd/devices/mchp23k256.c | 4 +--- drivers/mtd/devices/mchp48l640.c | 4 +--- drivers/mtd/devices/mtd_dataflash.c | 4 +--- drivers/mtd/devices/sst25l.c | 4 +--- drivers/net/can/m_can/tcan4x5x-core.c | 4 +--- drivers/net/can/spi/hi311x.c | 4 +--- drivers/net/can/spi/mcp251x.c | 4 +--- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 +--- drivers/net/dsa/b53/b53_spi.c | 4 +--- drivers/net/dsa/microchip/ksz8795_spi.c | 4 +--- drivers/net/dsa/microchip/ksz9477_spi.c | 4 +--- drivers/net/dsa/sja1105/sja1105_main.c | 6 ++---- drivers/net/dsa/vitesse-vsc73xx-spi.c | 6 ++---- drivers/net/ethernet/asix/ax88796c_main.c | 4 +--- drivers/net/ethernet/micrel/ks8851_spi.c | 4 +--- drivers/net/ethernet/microchip/enc28j60.c | 4 +--- drivers/net/ethernet/microchip/encx24j600.c | 4 +--- drivers/net/ethernet/qualcomm/qca_spi.c | 4 +--- drivers/net/ethernet/vertexcom/mse102x.c | 4 +--- drivers/net/ethernet/wiznet/w5100-spi.c | 4 +--- drivers/net/ieee802154/adf7242.c | 4 +--- drivers/net/ieee802154/at86rf230.c | 4 +--- drivers/net/ieee802154/ca8210.c | 6 ++---- drivers/net/ieee802154/cc2520.c | 4 +--- drivers/net/ieee802154/mcr20a.c | 4 +--- drivers/net/ieee802154/mrf24j40.c | 4 +--- drivers/net/phy/spi_ks8995.c | 4 +--- drivers/net/wan/slic_ds26522.c | 3 +-- drivers/net/wireless/intersil/p54/p54spi.c | 4 +--- drivers/net/wireless/marvell/libertas/if_spi.c | 4 +--- drivers/net/wireless/microchip/wilc1000/spi.c | 4 +--- drivers/net/wireless/st/cw1200/cw1200_spi.c | 4 +--- drivers/net/wireless/ti/wl1251/spi.c | 4 +--- drivers/net/wireless/ti/wlcore/spi.c | 4 +--- drivers/nfc/nfcmrvl/spi.c | 3 +-- drivers/nfc/st-nci/spi.c | 4 +--- drivers/nfc/st95hf/core.c | 4 +--- drivers/nfc/trf7970a.c | 4 +--- drivers/platform/chrome/cros_ec_spi.c | 4 +--- drivers/platform/olpc/olpc-xo175-ec.c | 4 +--- drivers/rtc/rtc-ds1302.c | 3 +-- drivers/rtc/rtc-ds1305.c | 4 +--- drivers/rtc/rtc-ds1343.c | 4 +--- drivers/spi/spi-mem.c | 6 ++---- drivers/spi/spi-slave-system-control.c | 3 +-- drivers/spi/spi-slave-time.c | 3 +-- drivers/spi/spi-tle62x0.c | 3 +-- drivers/spi/spi.c | 11 ++--------- drivers/spi/spidev.c | 4 +--- drivers/staging/fbtft/fbtft.h | 3 +-- drivers/staging/pi433/pi433_if.c | 4 +--- drivers/staging/wfx/bus_spi.c | 3 +-- drivers/tty/serial/max3100.c | 5 ++--- drivers/tty/serial/max310x.c | 3 +-- drivers/tty/serial/sc16is7xx.c | 4 +--- drivers/usb/gadget/udc/max3420_udc.c | 4 +--- drivers/usb/host/max3421-hcd.c | 3 +-- drivers/video/backlight/ams369fg06.c | 3 +-- drivers/video/backlight/corgi_lcd.c | 3 +-- drivers/video/backlight/ili922x.c | 3 +-- drivers/video/backlight/l4f00242t03.c | 3 +-- drivers/video/backlight/lms501kf03.c | 3 +-- drivers/video/backlight/ltv350qv.c | 3 +-- drivers/video/backlight/tdo24m.c | 3 +-- drivers/video/backlight/tosa_lcd.c | 4 +--- drivers/video/backlight/vgg2432a4.c | 4 +--- drivers/video/fbdev/omap/lcd_mipid.c | 4 +--- .../omap2/omapfb/displays/panel-lgphilips-lb035q02.c | 4 +--- .../omap2/omapfb/displays/panel-nec-nl8048hl11.c | 4 +--- .../omap2/omapfb/displays/panel-sony-acx565akm.c | 4 +--- .../omap2/omapfb/displays/panel-tpo-td028ttec1.c | 4 +--- .../omap2/omapfb/displays/panel-tpo-td043mtea1.c | 4 +--- include/linux/spi/spi.h | 2 +- sound/pci/hda/cs35l41_hda_spi.c | 4 +--- sound/soc/codecs/adau1761-spi.c | 3 +-- sound/soc/codecs/adau1781-spi.c | 3 +-- sound/soc/codecs/cs35l41-spi.c | 4 +--- sound/soc/codecs/pcm3168a-spi.c | 4 +--- sound/soc/codecs/pcm512x-spi.c | 3 +-- sound/soc/codecs/tlv320aic32x4-spi.c | 4 +--- sound/soc/codecs/tlv320aic3x-spi.c | 4 +--- sound/soc/codecs/wm0010.c | 4 +--- sound/soc/codecs/wm8804-spi.c | 3 +-- sound/spi/at73c213.c | 4 +--- 191 files changed, 197 insertions(+), 545 deletions(-) diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index fd87a59837fa2..5eb0fe73ddc45 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -815,7 +815,7 @@ static int moxtet_probe(struct spi_device *spi) return 0; } -static int moxtet_remove(struct spi_device *spi) +static void moxtet_remove(struct spi_device *spi) { struct moxtet *moxtet = spi_get_drvdata(spi); @@ -828,8 +828,6 @@ static int moxtet_remove(struct spi_device *spi) device_for_each_child(moxtet->dev, NULL, __unregister); mutex_destroy(&moxtet->lock); - - return 0; } static const struct of_device_id moxtet_dt_ids[] = { diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c index ccd9e42b8eab4..22d184884694a 100644 --- a/drivers/char/tpm/st33zp24/spi.c +++ b/drivers/char/tpm/st33zp24/spi.c @@ -381,13 +381,11 @@ static int st33zp24_spi_probe(struct spi_device *dev) * @param: client, the spi_device description (TPM SPI description). * @return: 0 in case of success. */ -static int st33zp24_spi_remove(struct spi_device *dev) +static void st33zp24_spi_remove(struct spi_device *dev) { struct tpm_chip *chip = spi_get_drvdata(dev); st33zp24_remove(chip); - - return 0; } static const struct spi_device_id st33zp24_spi_id[] = { diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c index aaa59a00eeaef..184396b3af501 100644 --- a/drivers/char/tpm/tpm_tis_spi_main.c +++ b/drivers/char/tpm/tpm_tis_spi_main.c @@ -254,13 +254,12 @@ static int tpm_tis_spi_driver_probe(struct spi_device *spi) static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume); -static int tpm_tis_spi_remove(struct spi_device *dev) +static void tpm_tis_spi_remove(struct spi_device *dev) { struct tpm_chip *chip = spi_get_drvdata(dev); tpm_chip_unregister(chip); tpm_tis_remove(chip); - return 0; } static const struct spi_device_id tpm_tis_spi_id[] = { diff --git a/drivers/clk/clk-lmk04832.c b/drivers/clk/clk-lmk04832.c index 8f02c0b880005..f416f8bc28987 100644 --- a/drivers/clk/clk-lmk04832.c +++ b/drivers/clk/clk-lmk04832.c @@ -1544,14 +1544,12 @@ err_disable_oscin: return ret; } -static int lmk04832_remove(struct spi_device *spi) +static void lmk04832_remove(struct spi_device *spi) { struct lmk04832 *lmk = spi_get_drvdata(spi); clk_disable_unprepare(lmk->oscin); of_clk_del_provider(spi->dev.of_node); - - return 0; } static const struct spi_device_id lmk04832_id[] = { { "lmk04832", LMK04832 }, diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 4a55cdf089d62..e00c333105170 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -163,15 +163,13 @@ exit_destroy: return ret; } -static int gen_74x164_remove(struct spi_device *spi) +static void gen_74x164_remove(struct spi_device *spi) { struct gen_74x164_chip *chip = spi_get_drvdata(spi); gpiod_set_value_cansleep(chip->gpiod_oe, 0); gpiochip_remove(&chip->gpio_chip); mutex_destroy(&chip->lock); - - return 0; } static const struct spi_device_id gen_74x164_spi_ids[] = { diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c index 51cd6f98d1c74..161c4751c5f7a 100644 --- a/drivers/gpio/gpio-max3191x.c +++ b/drivers/gpio/gpio-max3191x.c @@ -443,14 +443,12 @@ static int max3191x_probe(struct spi_device *spi) return 0; } -static int max3191x_remove(struct spi_device *spi) +static void max3191x_remove(struct spi_device *spi) { struct max3191x_chip *max3191x = spi_get_drvdata(spi); gpiochip_remove(&max3191x->gpio); mutex_destroy(&max3191x->lock); - - return 0; } static int __init max3191x_register_driver(struct spi_driver *sdrv) diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index 5862d73bf3254..11813f41d4609 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -64,11 +64,9 @@ static int max7301_probe(struct spi_device *spi) return ret; } -static int max7301_remove(struct spi_device *spi) +static void max7301_remove(struct spi_device *spi) { __max730x_remove(&spi->dev); - - return 0; } static const struct spi_device_id max7301_id[] = { diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index 31d2be1bebc84..cd9b16dbe1a97 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -134,7 +134,7 @@ exit_destroy: return ret; } -static int mc33880_remove(struct spi_device *spi) +static void mc33880_remove(struct spi_device *spi) { struct mc33880 *mc; @@ -142,8 +142,6 @@ static int mc33880_remove(struct spi_device *spi) gpiochip_remove(&mc->chip); mutex_destroy(&mc->lock); - - return 0; } static struct spi_driver mc33880_driver = { diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c index 8e04054cf07e3..81a47ae09ff8c 100644 --- a/drivers/gpio/gpio-pisosr.c +++ b/drivers/gpio/gpio-pisosr.c @@ -163,15 +163,13 @@ static int pisosr_gpio_probe(struct spi_device *spi) return 0; } -static int pisosr_gpio_remove(struct spi_device *spi) +static void pisosr_gpio_remove(struct spi_device *spi) { struct pisosr_gpio *gpio = spi_get_drvdata(spi); gpiochip_remove(&gpio->chip); mutex_destroy(&gpio->lock); - - return 0; } static const struct spi_device_id pisosr_gpio_id_table[] = { diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c index f043b484055b2..ed626fdc08e88 100644 --- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c +++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c @@ -293,15 +293,13 @@ static int y030xx067a_probe(struct spi_device *spi) return 0; } -static int y030xx067a_remove(struct spi_device *spi) +static void y030xx067a_remove(struct spi_device *spi) { struct y030xx067a *priv = spi_get_drvdata(spi); drm_panel_remove(&priv->panel); drm_panel_disable(&priv->panel); drm_panel_unprepare(&priv->panel); - - return 0; } static const struct drm_display_mode y030xx067a_modes[] = { diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c index 8e84df9a0033d..3dfafa585127f 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c @@ -896,14 +896,12 @@ static int ili9322_probe(struct spi_device *spi) return 0; } -static int ili9322_remove(struct spi_device *spi) +static void ili9322_remove(struct spi_device *spi) { struct ili9322 *ili = spi_get_drvdata(spi); ili9322_power_off(ili); drm_panel_remove(&ili->panel); - - return 0; } /* diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 2c3378a259b1e..a07ef26234e57 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -728,7 +728,7 @@ static int ili9341_probe(struct spi_device *spi) return -1; } -static int ili9341_remove(struct spi_device *spi) +static void ili9341_remove(struct spi_device *spi) { const struct spi_device_id *id = spi_get_device_id(spi); struct ili9341 *ili = spi_get_drvdata(spi); @@ -741,7 +741,6 @@ static int ili9341_remove(struct spi_device *spi) drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); } - return 0; } static void ili9341_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/panel/panel-innolux-ej030na.c b/drivers/gpu/drm/panel/panel-innolux-ej030na.c index c558de3f99be7..e3b1daa0cb720 100644 --- a/drivers/gpu/drm/panel/panel-innolux-ej030na.c +++ b/drivers/gpu/drm/panel/panel-innolux-ej030na.c @@ -219,15 +219,13 @@ static int ej030na_probe(struct spi_device *spi) return 0; } -static int ej030na_remove(struct spi_device *spi) +static void ej030na_remove(struct spi_device *spi) { struct ej030na *priv = spi_get_drvdata(spi); drm_panel_remove(&priv->panel); drm_panel_disable(&priv->panel); drm_panel_unprepare(&priv->panel); - - return 0; } static const struct drm_display_mode ej030na_modes[] = { diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c index f3183b68704f6..9d0d4faa3f58a 100644 --- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c +++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c @@ -203,14 +203,12 @@ static int lb035q02_probe(struct spi_device *spi) return 0; } -static int lb035q02_remove(struct spi_device *spi) +static void lb035q02_remove(struct spi_device *spi) { struct lb035q02_device *lcd = spi_get_drvdata(spi); drm_panel_remove(&lcd->panel); drm_panel_disable(&lcd->panel); - - return 0; } static const struct of_device_id lb035q02_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-lg-lg4573.c b/drivers/gpu/drm/panel/panel-lg-lg4573.c index 8e5160af1de5d..cf246d15b7b62 100644 --- a/drivers/gpu/drm/panel/panel-lg-lg4573.c +++ b/drivers/gpu/drm/panel/panel-lg-lg4573.c @@ -266,14 +266,12 @@ static int lg4573_probe(struct spi_device *spi) return 0; } -static int lg4573_remove(struct spi_device *spi) +static void lg4573_remove(struct spi_device *spi) { struct lg4573 *ctx = spi_get_drvdata(spi); lg4573_display_off(ctx); drm_panel_remove(&ctx->panel); - - return 0; } static const struct of_device_id lg4573_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c index 6e5ab1debc8b4..81c5c541a351c 100644 --- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c +++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c @@ -212,15 +212,13 @@ static int nl8048_probe(struct spi_device *spi) return 0; } -static int nl8048_remove(struct spi_device *spi) +static void nl8048_remove(struct spi_device *spi) { struct nl8048_panel *lcd = spi_get_drvdata(spi); drm_panel_remove(&lcd->panel); drm_panel_disable(&lcd->panel); drm_panel_unprepare(&lcd->panel); - - return 0; } static const struct of_device_id nl8048_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-novatek-nt39016.c b/drivers/gpu/drm/panel/panel-novatek-nt39016.c index d036853db865f..f58cfb10b58a2 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt39016.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt39016.c @@ -292,7 +292,7 @@ static int nt39016_probe(struct spi_device *spi) return 0; } -static int nt39016_remove(struct spi_device *spi) +static void nt39016_remove(struct spi_device *spi) { struct nt39016 *panel = spi_get_drvdata(spi); @@ -300,8 +300,6 @@ static int nt39016_remove(struct spi_device *spi) nt39016_disable(&panel->drm_panel); nt39016_unprepare(&panel->drm_panel); - - return 0; } static const struct drm_display_mode kd035g6_display_modes[] = { diff --git a/drivers/gpu/drm/panel/panel-samsung-db7430.c b/drivers/gpu/drm/panel/panel-samsung-db7430.c index ead479719f008..04640c5256a84 100644 --- a/drivers/gpu/drm/panel/panel-samsung-db7430.c +++ b/drivers/gpu/drm/panel/panel-samsung-db7430.c @@ -314,12 +314,11 @@ static int db7430_probe(struct spi_device *spi) return 0; } -static int db7430_remove(struct spi_device *spi) +static void db7430_remove(struct spi_device *spi) { struct db7430 *db = spi_get_drvdata(spi); drm_panel_remove(&db->panel); - return 0; } /* diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c index c4b388850a13e..01eb211f32f75 100644 --- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c +++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c @@ -358,14 +358,12 @@ static int ld9040_probe(struct spi_device *spi) return 0; } -static int ld9040_remove(struct spi_device *spi) +static void ld9040_remove(struct spi_device *spi) { struct ld9040 *ctx = spi_get_drvdata(spi); ld9040_power_off(ctx); drm_panel_remove(&ctx->panel); - - return 0; } static const struct of_device_id ld9040_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c index 1696ceb36aa02..2adb223a895c9 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c @@ -291,12 +291,11 @@ static int s6d27a1_probe(struct spi_device *spi) return 0; } -static int s6d27a1_remove(struct spi_device *spi) +static void s6d27a1_remove(struct spi_device *spi) { struct s6d27a1 *ctx = spi_get_drvdata(spi); drm_panel_remove(&ctx->panel); - return 0; } static const struct of_device_id s6d27a1_match[] = { diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c index c178d962b0d51..d99afcc672ca1 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c @@ -62,10 +62,9 @@ static int s6e63m0_spi_probe(struct spi_device *spi) s6e63m0_spi_dcs_write, false); } -static int s6e63m0_spi_remove(struct spi_device *spi) +static void s6e63m0_spi_remove(struct spi_device *spi) { s6e63m0_remove(&spi->dev); - return 0; } static const struct of_device_id s6e63m0_spi_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c index 61e565524542c..bbc4569cbcdc2 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c @@ -387,13 +387,11 @@ static int st7789v_probe(struct spi_device *spi) return 0; } -static int st7789v_remove(struct spi_device *spi) +static void st7789v_remove(struct spi_device *spi) { struct st7789v *ctx = spi_get_drvdata(spi); drm_panel_remove(&ctx->panel); - - return 0; } static const struct of_device_id st7789v_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c index ba0b3ead150f9..0d7541a33f87f 100644 --- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c +++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c @@ -655,7 +655,7 @@ static int acx565akm_probe(struct spi_device *spi) return 0; } -static int acx565akm_remove(struct spi_device *spi) +static void acx565akm_remove(struct spi_device *spi) { struct acx565akm_panel *lcd = spi_get_drvdata(spi); @@ -666,8 +666,6 @@ static int acx565akm_remove(struct spi_device *spi) drm_panel_disable(&lcd->panel); drm_panel_unprepare(&lcd->panel); - - return 0; } static const struct of_device_id acx565akm_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c index ba0c00d1a001c..4dbf8b88f264f 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c @@ -350,15 +350,13 @@ static int td028ttec1_probe(struct spi_device *spi) return 0; } -static int td028ttec1_remove(struct spi_device *spi) +static void td028ttec1_remove(struct spi_device *spi) { struct td028ttec1_panel *lcd = spi_get_drvdata(spi); drm_panel_remove(&lcd->panel); drm_panel_disable(&lcd->panel); drm_panel_unprepare(&lcd->panel); - - return 0; } static const struct of_device_id td028ttec1_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c index 1866cdb8f9c1d..cf4609bb9b1da 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c @@ -463,7 +463,7 @@ static int td043mtea1_probe(struct spi_device *spi) return 0; } -static int td043mtea1_remove(struct spi_device *spi) +static void td043mtea1_remove(struct spi_device *spi) { struct td043mtea1_panel *lcd = spi_get_drvdata(spi); @@ -472,8 +472,6 @@ static int td043mtea1_remove(struct spi_device *spi) drm_panel_unprepare(&lcd->panel); sysfs_remove_group(&spi->dev.kobj, &td043mtea1_attr_group); - - return 0; } static const struct of_device_id td043mtea1_of_match[] = { diff --git a/drivers/gpu/drm/panel/panel-tpo-tpg110.c b/drivers/gpu/drm/panel/panel-tpo-tpg110.c index e3791dad6830c..0b1f5a11a0554 100644 --- a/drivers/gpu/drm/panel/panel-tpo-tpg110.c +++ b/drivers/gpu/drm/panel/panel-tpo-tpg110.c @@ -450,12 +450,11 @@ static int tpg110_probe(struct spi_device *spi) return 0; } -static int tpg110_remove(struct spi_device *spi) +static void tpg110_remove(struct spi_device *spi) { struct tpg110 *tpg = spi_get_drvdata(spi); drm_panel_remove(&tpg->panel); - return 0; } static const struct of_device_id tpg110_match[] = { diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c b/drivers/gpu/drm/panel/panel-widechips-ws2401.c index 8bc976f54b801..236f3cb2b594e 100644 --- a/drivers/gpu/drm/panel/panel-widechips-ws2401.c +++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c @@ -407,12 +407,11 @@ static int ws2401_probe(struct spi_device *spi) return 0; } -static int ws2401_remove(struct spi_device *spi) +static void ws2401_remove(struct spi_device *spi) { struct ws2401 *ws = spi_get_drvdata(spi); drm_panel_remove(&ws->panel); - return 0; } /* diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c index 9b33c05732aa8..ebb025543f8de 100644 --- a/drivers/gpu/drm/tiny/hx8357d.c +++ b/drivers/gpu/drm/tiny/hx8357d.c @@ -263,14 +263,12 @@ static int hx8357d_probe(struct spi_device *spi) return 0; } -static int hx8357d_remove(struct spi_device *spi) +static void hx8357d_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void hx8357d_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9163.c b/drivers/gpu/drm/tiny/ili9163.c index bcc1813512369..fc8ed245b0bc9 100644 --- a/drivers/gpu/drm/tiny/ili9163.c +++ b/drivers/gpu/drm/tiny/ili9163.c @@ -193,14 +193,12 @@ static int ili9163_probe(struct spi_device *spi) return 0; } -static int ili9163_remove(struct spi_device *spi) +static void ili9163_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9163_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c index 976d3209f1647..cc92eb9f2a07b 100644 --- a/drivers/gpu/drm/tiny/ili9225.c +++ b/drivers/gpu/drm/tiny/ili9225.c @@ -411,14 +411,12 @@ static int ili9225_probe(struct spi_device *spi) return 0; } -static int ili9225_remove(struct spi_device *spi) +static void ili9225_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9225_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c index 37e0c33399c8f..5b8cc770ee7b9 100644 --- a/drivers/gpu/drm/tiny/ili9341.c +++ b/drivers/gpu/drm/tiny/ili9341.c @@ -225,14 +225,12 @@ static int ili9341_probe(struct spi_device *spi) return 0; } -static int ili9341_remove(struct spi_device *spi) +static void ili9341_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9341_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c index e9a63f4b2993c..6d655e18e0aa2 100644 --- a/drivers/gpu/drm/tiny/ili9486.c +++ b/drivers/gpu/drm/tiny/ili9486.c @@ -243,14 +243,12 @@ static int ili9486_probe(struct spi_device *spi) return 0; } -static int ili9486_remove(struct spi_device *spi) +static void ili9486_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void ili9486_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c index 023de49e7a8e6..5e060f6910bb4 100644 --- a/drivers/gpu/drm/tiny/mi0283qt.c +++ b/drivers/gpu/drm/tiny/mi0283qt.c @@ -233,14 +233,12 @@ static int mi0283qt_probe(struct spi_device *spi) return 0; } -static int mi0283qt_remove(struct spi_device *spi) +static void mi0283qt_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void mi0283qt_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 97a775c48cea7..beeeb170d0b19 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -1140,14 +1140,12 @@ static int repaper_probe(struct spi_device *spi) return 0; } -static int repaper_remove(struct spi_device *spi) +static void repaper_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void repaper_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c index 51b9b9fb3ead1..3f38faa1cd8c8 100644 --- a/drivers/gpu/drm/tiny/st7586.c +++ b/drivers/gpu/drm/tiny/st7586.c @@ -360,14 +360,12 @@ static int st7586_probe(struct spi_device *spi) return 0; } -static int st7586_remove(struct spi_device *spi) +static void st7586_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void st7586_shutdown(struct spi_device *spi) diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c index fc40dd10efa8f..29d618093e946 100644 --- a/drivers/gpu/drm/tiny/st7735r.c +++ b/drivers/gpu/drm/tiny/st7735r.c @@ -247,14 +247,12 @@ static int st7735r_probe(struct spi_device *spi) return 0; } -static int st7735r_remove(struct spi_device *spi) +static void st7735r_remove(struct spi_device *spi) { struct drm_device *drm = spi_get_drvdata(spi); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void st7735r_shutdown(struct spi_device *spi) diff --git a/drivers/hwmon/adcxx.c b/drivers/hwmon/adcxx.c index e5bc5ce09f4e7..de37bce24fa68 100644 --- a/drivers/hwmon/adcxx.c +++ b/drivers/hwmon/adcxx.c @@ -194,7 +194,7 @@ out_err: return status; } -static int adcxx_remove(struct spi_device *spi) +static void adcxx_remove(struct spi_device *spi) { struct adcxx *adc = spi_get_drvdata(spi); int i; @@ -205,8 +205,6 @@ static int adcxx_remove(struct spi_device *spi) device_remove_file(&spi->dev, &ad_input[i].dev_attr); mutex_unlock(&adc->lock); - - return 0; } static const struct spi_device_id adcxx_ids[] = { diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c index c40cac16af683..832d9ec64934d 100644 --- a/drivers/hwmon/adt7310.c +++ b/drivers/hwmon/adt7310.c @@ -88,10 +88,9 @@ static int adt7310_spi_probe(struct spi_device *spi) &adt7310_spi_ops); } -static int adt7310_spi_remove(struct spi_device *spi) +static void adt7310_spi_remove(struct spi_device *spi) { adt7x10_remove(&spi->dev, spi->irq); - return 0; } static const struct spi_device_id adt7310_id[] = { diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c index 5fcfd57df61e1..4c5487aeb3cfc 100644 --- a/drivers/hwmon/max1111.c +++ b/drivers/hwmon/max1111.c @@ -254,7 +254,7 @@ err_remove: return err; } -static int max1111_remove(struct spi_device *spi) +static void max1111_remove(struct spi_device *spi) { struct max1111_data *data = spi_get_drvdata(spi); @@ -265,7 +265,6 @@ static int max1111_remove(struct spi_device *spi) sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group); sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group); mutex_destroy(&data->drvdata_lock); - return 0; } static const struct spi_device_id max1111_ids[] = { diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c index 4cf4fe6809a32..93e048ee49557 100644 --- a/drivers/hwmon/max31722.c +++ b/drivers/hwmon/max31722.c @@ -100,7 +100,7 @@ static int max31722_probe(struct spi_device *spi) return 0; } -static int max31722_remove(struct spi_device *spi) +static void max31722_remove(struct spi_device *spi) { struct max31722_data *data = spi_get_drvdata(spi); int ret; @@ -111,8 +111,6 @@ static int max31722_remove(struct spi_device *spi) if (ret) /* There is nothing we can do about this ... */ dev_warn(&spi->dev, "Failed to put device in stand-by mode\n"); - - return 0; } static int __maybe_unused max31722_suspend(struct device *dev) diff --git a/drivers/iio/accel/bma400_spi.c b/drivers/iio/accel/bma400_spi.c index 9f622e37477b4..9040a717b2470 100644 --- a/drivers/iio/accel/bma400_spi.c +++ b/drivers/iio/accel/bma400_spi.c @@ -87,11 +87,9 @@ static int bma400_spi_probe(struct spi_device *spi) return bma400_probe(&spi->dev, regmap, id->name); } -static int bma400_spi_remove(struct spi_device *spi) +static void bma400_spi_remove(struct spi_device *spi) { bma400_remove(&spi->dev); - - return 0; } static const struct spi_device_id bma400_spi_ids[] = { diff --git a/drivers/iio/accel/bmc150-accel-spi.c b/drivers/iio/accel/bmc150-accel-spi.c index 11559567cb398..80007cc2d0446 100644 --- a/drivers/iio/accel/bmc150-accel-spi.c +++ b/drivers/iio/accel/bmc150-accel-spi.c @@ -35,11 +35,9 @@ static int bmc150_accel_probe(struct spi_device *spi) true); } -static int bmc150_accel_remove(struct spi_device *spi) +static void bmc150_accel_remove(struct spi_device *spi) { bmc150_accel_core_remove(&spi->dev); - - return 0; } static const struct acpi_device_id bmc150_accel_acpi_match[] = { diff --git a/drivers/iio/accel/bmi088-accel-spi.c b/drivers/iio/accel/bmi088-accel-spi.c index 758ad2f128962..06d99d9949f3f 100644 --- a/drivers/iio/accel/bmi088-accel-spi.c +++ b/drivers/iio/accel/bmi088-accel-spi.c @@ -56,11 +56,9 @@ static int bmi088_accel_probe(struct spi_device *spi) true); } -static int bmi088_accel_remove(struct spi_device *spi) +static void bmi088_accel_remove(struct spi_device *spi) { bmi088_accel_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id bmi088_accel_id[] = { diff --git a/drivers/iio/accel/kxsd9-spi.c b/drivers/iio/accel/kxsd9-spi.c index 441e6b764281c..57c451cfb9e5d 100644 --- a/drivers/iio/accel/kxsd9-spi.c +++ b/drivers/iio/accel/kxsd9-spi.c @@ -32,11 +32,9 @@ static int kxsd9_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->name); } -static int kxsd9_spi_remove(struct spi_device *spi) +static void kxsd9_spi_remove(struct spi_device *spi) { kxsd9_common_remove(&spi->dev); - - return 0; } static const struct spi_device_id kxsd9_spi_id[] = { diff --git a/drivers/iio/accel/mma7455_spi.c b/drivers/iio/accel/mma7455_spi.c index ecf690692dcc8..b746031551a39 100644 --- a/drivers/iio/accel/mma7455_spi.c +++ b/drivers/iio/accel/mma7455_spi.c @@ -22,11 +22,9 @@ static int mma7455_spi_probe(struct spi_device *spi) return mma7455_core_probe(&spi->dev, regmap, id->name); } -static int mma7455_spi_remove(struct spi_device *spi) +static void mma7455_spi_remove(struct spi_device *spi) { mma7455_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id mma7455_spi_ids[] = { diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c index 43ecacbdc95ae..83c81072511ee 100644 --- a/drivers/iio/accel/sca3000.c +++ b/drivers/iio/accel/sca3000.c @@ -1524,7 +1524,7 @@ error_ret: return ret; } -static int sca3000_remove(struct spi_device *spi) +static void sca3000_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct sca3000_state *st = iio_priv(indio_dev); @@ -1535,8 +1535,6 @@ static int sca3000_remove(struct spi_device *spi) sca3000_stop_all_interrupts(st); if (spi->irq) free_irq(spi->irq, indio_dev); - - return 0; } static const struct spi_device_id sca3000_id[] = { diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 1d345d66742d8..c17d9b5fbaf64 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -479,7 +479,7 @@ error_disable_reg: return ret; } -static int ad7266_remove(struct spi_device *spi) +static void ad7266_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad7266_state *st = iio_priv(indio_dev); @@ -488,8 +488,6 @@ static int ad7266_remove(struct spi_device *spi) iio_triggered_buffer_cleanup(indio_dev); if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ad7266_id[] = { diff --git a/drivers/iio/adc/ltc2496.c b/drivers/iio/adc/ltc2496.c index dd956a7c216e1..5a55f79f25749 100644 --- a/drivers/iio/adc/ltc2496.c +++ b/drivers/iio/adc/ltc2496.c @@ -78,13 +78,11 @@ static int ltc2496_probe(struct spi_device *spi) return ltc2497core_probe(dev, indio_dev); } -static int ltc2496_remove(struct spi_device *spi) +static void ltc2496_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); ltc2497core_remove(indio_dev); - - return 0; } static const struct of_device_id ltc2496_of_match[] = { diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c index 8d1cff28cae0a..b4c69acb33e34 100644 --- a/drivers/iio/adc/mcp320x.c +++ b/drivers/iio/adc/mcp320x.c @@ -459,15 +459,13 @@ reg_disable: return ret; } -static int mcp320x_remove(struct spi_device *spi) +static void mcp320x_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct mcp320x *adc = iio_priv(indio_dev); iio_device_unregister(indio_dev); regulator_disable(adc->reg); - - return 0; } static const struct of_device_id mcp320x_dt_ids[] = { diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index 13535f148c4c6..1cb4590fe4125 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -321,7 +321,7 @@ reg_disable: return ret; } -static int mcp3911_remove(struct spi_device *spi) +static void mcp3911_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct mcp3911 *adc = iio_priv(indio_dev); @@ -331,8 +331,6 @@ static int mcp3911_remove(struct spi_device *spi) clk_disable_unprepare(adc->clki); if (adc->vref) regulator_disable(adc->vref); - - return 0; } static const struct of_device_id mcp3911_dt_ids[] = { diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c index 6eb62b564dae2..59d75d09604f3 100644 --- a/drivers/iio/adc/ti-adc12138.c +++ b/drivers/iio/adc/ti-adc12138.c @@ -503,7 +503,7 @@ err_clk_disable: return ret; } -static int adc12138_remove(struct spi_device *spi) +static void adc12138_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct adc12138 *adc = iio_priv(indio_dev); @@ -514,8 +514,6 @@ static int adc12138_remove(struct spi_device *spi) regulator_disable(adc->vref_n); regulator_disable(adc->vref_p); clk_disable_unprepare(adc->cclk); - - return 0; } static const struct of_device_id adc12138_dt_ids[] = { diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index a7efa3eada2c4..e3658b969c5bf 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -662,7 +662,7 @@ error_destroy_mutex: return ret; } -static int ti_ads7950_remove(struct spi_device *spi) +static void ti_ads7950_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ti_ads7950_state *st = iio_priv(indio_dev); @@ -672,8 +672,6 @@ static int ti_ads7950_remove(struct spi_device *spi) iio_triggered_buffer_cleanup(indio_dev); regulator_disable(st->reg); mutex_destroy(&st->slock); - - return 0; } static const struct spi_device_id ti_ads7950_id[] = { diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 2e24717d7f553..22c2583eedd0f 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -479,7 +479,7 @@ err_regulator_disable: return ret; } -static int ads8688_remove(struct spi_device *spi) +static void ads8688_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ads8688_state *st = iio_priv(indio_dev); @@ -489,8 +489,6 @@ static int ads8688_remove(struct spi_device *spi) if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ads8688_id[] = { diff --git a/drivers/iio/adc/ti-tlc4541.c b/drivers/iio/adc/ti-tlc4541.c index 403b787f9f7ec..2406eda9dfc6a 100644 --- a/drivers/iio/adc/ti-tlc4541.c +++ b/drivers/iio/adc/ti-tlc4541.c @@ -224,7 +224,7 @@ error_disable_reg: return ret; } -static int tlc4541_remove(struct spi_device *spi) +static void tlc4541_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct tlc4541_state *st = iio_priv(indio_dev); @@ -232,8 +232,6 @@ static int tlc4541_remove(struct spi_device *spi) iio_device_unregister(indio_dev); iio_triggered_buffer_cleanup(indio_dev); regulator_disable(st->reg); - - return 0; } static const struct of_device_id tlc4541_dt_ids[] = { diff --git a/drivers/iio/amplifiers/ad8366.c b/drivers/iio/amplifiers/ad8366.c index cfcf18a0bce85..1134ae12e5319 100644 --- a/drivers/iio/amplifiers/ad8366.c +++ b/drivers/iio/amplifiers/ad8366.c @@ -298,7 +298,7 @@ error_disable_reg: return ret; } -static int ad8366_remove(struct spi_device *spi) +static void ad8366_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad8366_state *st = iio_priv(indio_dev); @@ -308,8 +308,6 @@ static int ad8366_remove(struct spi_device *spi) if (!IS_ERR(reg)) regulator_disable(reg); - - return 0; } static const struct spi_device_id ad8366_id[] = { diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c index 1aee871000381..eafaf4529df54 100644 --- a/drivers/iio/common/ssp_sensors/ssp_dev.c +++ b/drivers/iio/common/ssp_sensors/ssp_dev.c @@ -586,7 +586,7 @@ err_setup_irq: return ret; } -static int ssp_remove(struct spi_device *spi) +static void ssp_remove(struct spi_device *spi) { struct ssp_data *data = spi_get_drvdata(spi); @@ -608,8 +608,6 @@ static int ssp_remove(struct spi_device *spi) mutex_destroy(&data->pending_lock); mfd_remove_devices(&spi->dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/iio/dac/ad5360.c b/drivers/iio/dac/ad5360.c index 2d3b14c407d83..ecbc6a51d60fa 100644 --- a/drivers/iio/dac/ad5360.c +++ b/drivers/iio/dac/ad5360.c @@ -521,7 +521,7 @@ error_free_channels: return ret; } -static int ad5360_remove(struct spi_device *spi) +static void ad5360_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5360_state *st = iio_priv(indio_dev); @@ -531,8 +531,6 @@ static int ad5360_remove(struct spi_device *spi) kfree(indio_dev->channels); regulator_bulk_disable(st->chip_info->num_vrefs, st->vref_reg); - - return 0; } static const struct spi_device_id ad5360_ids[] = { diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c index e38860a6a9f3f..82e1d9bd773e5 100644 --- a/drivers/iio/dac/ad5380.c +++ b/drivers/iio/dac/ad5380.c @@ -488,11 +488,9 @@ static int ad5380_spi_probe(struct spi_device *spi) return ad5380_probe(&spi->dev, regmap, id->driver_data, id->name); } -static int ad5380_spi_remove(struct spi_device *spi) +static void ad5380_spi_remove(struct spi_device *spi) { ad5380_remove(&spi->dev); - - return 0; } static const struct spi_device_id ad5380_spi_ids[] = { diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 1c9b54c012a7e..14cfabacbea53 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -491,11 +491,9 @@ static int ad5446_spi_probe(struct spi_device *spi) &ad5446_spi_chip_info[id->driver_data]); } -static int ad5446_spi_remove(struct spi_device *spi) +static void ad5446_spi_remove(struct spi_device *spi) { ad5446_remove(&spi->dev); - - return 0; } static struct spi_driver ad5446_spi_driver = { diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c index f5e93c6acc9d0..bad9bdaafa94d 100644 --- a/drivers/iio/dac/ad5449.c +++ b/drivers/iio/dac/ad5449.c @@ -330,7 +330,7 @@ error_disable_reg: return ret; } -static int ad5449_spi_remove(struct spi_device *spi) +static void ad5449_spi_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5449 *st = iio_priv(indio_dev); @@ -338,8 +338,6 @@ static int ad5449_spi_remove(struct spi_device *spi) iio_device_unregister(indio_dev); regulator_bulk_disable(st->chip_info->num_channels, st->vref_reg); - - return 0; } static const struct spi_device_id ad5449_spi_ids[] = { diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c index b631261efa97a..8507573aa13e9 100644 --- a/drivers/iio/dac/ad5504.c +++ b/drivers/iio/dac/ad5504.c @@ -336,7 +336,7 @@ error_disable_reg: return ret; } -static int ad5504_remove(struct spi_device *spi) +static void ad5504_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5504_state *st = iio_priv(indio_dev); @@ -345,8 +345,6 @@ static int ad5504_remove(struct spi_device *spi) if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ad5504_id[] = { diff --git a/drivers/iio/dac/ad5592r.c b/drivers/iio/dac/ad5592r.c index 6bfd7951e18ca..0f7abfa75bec7 100644 --- a/drivers/iio/dac/ad5592r.c +++ b/drivers/iio/dac/ad5592r.c @@ -130,11 +130,9 @@ static int ad5592r_spi_probe(struct spi_device *spi) return ad5592r_probe(&spi->dev, id->name, &ad5592r_rw_ops); } -static int ad5592r_spi_remove(struct spi_device *spi) +static void ad5592r_spi_remove(struct spi_device *spi) { ad5592r_remove(&spi->dev); - - return 0; } static const struct spi_device_id ad5592r_spi_ids[] = { diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c index 3c98941b9f99a..371e812850eba 100644 --- a/drivers/iio/dac/ad5624r_spi.c +++ b/drivers/iio/dac/ad5624r_spi.c @@ -293,7 +293,7 @@ error_disable_reg: return ret; } -static int ad5624r_remove(struct spi_device *spi) +static void ad5624r_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5624r_state *st = iio_priv(indio_dev); @@ -301,8 +301,6 @@ static int ad5624r_remove(struct spi_device *spi) iio_device_unregister(indio_dev); if (!IS_ERR(st->reg)) regulator_disable(st->reg); - - return 0; } static const struct spi_device_id ad5624r_id[] = { diff --git a/drivers/iio/dac/ad5686-spi.c b/drivers/iio/dac/ad5686-spi.c index 2628810fdbb1f..d26fb29b6b04d 100644 --- a/drivers/iio/dac/ad5686-spi.c +++ b/drivers/iio/dac/ad5686-spi.c @@ -95,11 +95,9 @@ static int ad5686_spi_probe(struct spi_device *spi) ad5686_spi_write, ad5686_spi_read); } -static int ad5686_spi_remove(struct spi_device *spi) +static void ad5686_spi_remove(struct spi_device *spi) { ad5686_remove(&spi->dev); - - return 0; } static const struct spi_device_id ad5686_spi_id[] = { diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c index e37e095e94fc2..4cb8471db81e0 100644 --- a/drivers/iio/dac/ad5761.c +++ b/drivers/iio/dac/ad5761.c @@ -394,7 +394,7 @@ disable_regulator_err: return ret; } -static int ad5761_remove(struct spi_device *spi) +static void ad5761_remove(struct spi_device *spi) { struct iio_dev *iio_dev = spi_get_drvdata(spi); struct ad5761_state *st = iio_priv(iio_dev); @@ -403,8 +403,6 @@ static int ad5761_remove(struct spi_device *spi) if (!IS_ERR_OR_NULL(st->vref_reg)) regulator_disable(st->vref_reg); - - return 0; } static const struct spi_device_id ad5761_id[] = { diff --git a/drivers/iio/dac/ad5764.c b/drivers/iio/dac/ad5764.c index ae089b9145cb7..d235a8047ba0c 100644 --- a/drivers/iio/dac/ad5764.c +++ b/drivers/iio/dac/ad5764.c @@ -332,7 +332,7 @@ error_disable_reg: return ret; } -static int ad5764_remove(struct spi_device *spi) +static void ad5764_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5764_state *st = iio_priv(indio_dev); @@ -341,8 +341,6 @@ static int ad5764_remove(struct spi_device *spi) if (st->chip_info->int_vref == 0) regulator_bulk_disable(ARRAY_SIZE(st->vref_reg), st->vref_reg); - - return 0; } static const struct spi_device_id ad5764_ids[] = { diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c index 7b4579d73d18b..2b14914b40500 100644 --- a/drivers/iio/dac/ad5791.c +++ b/drivers/iio/dac/ad5791.c @@ -428,7 +428,7 @@ error_disable_reg_pos: return ret; } -static int ad5791_remove(struct spi_device *spi) +static void ad5791_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad5791_state *st = iio_priv(indio_dev); @@ -439,8 +439,6 @@ static int ad5791_remove(struct spi_device *spi) if (!IS_ERR(st->reg_vss)) regulator_disable(st->reg_vss); - - return 0; } static const struct spi_device_id ad5791_id[] = { diff --git a/drivers/iio/dac/ad8801.c b/drivers/iio/dac/ad8801.c index 5ecfdad54dec3..6be35c92d435a 100644 --- a/drivers/iio/dac/ad8801.c +++ b/drivers/iio/dac/ad8801.c @@ -193,7 +193,7 @@ error_disable_vrefh_reg: return ret; } -static int ad8801_remove(struct spi_device *spi) +static void ad8801_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad8801_state *state = iio_priv(indio_dev); @@ -202,8 +202,6 @@ static int ad8801_remove(struct spi_device *spi) if (state->vrefl_reg) regulator_disable(state->vrefl_reg); regulator_disable(state->vrefh_reg); - - return 0; } static const struct spi_device_id ad8801_ids[] = { diff --git a/drivers/iio/dac/ltc1660.c b/drivers/iio/dac/ltc1660.c index f6ec9bf5815ea..c76233c9bb72b 100644 --- a/drivers/iio/dac/ltc1660.c +++ b/drivers/iio/dac/ltc1660.c @@ -206,15 +206,13 @@ error_disable_reg: return ret; } -static int ltc1660_remove(struct spi_device *spi) +static void ltc1660_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ltc1660_priv *priv = iio_priv(indio_dev); iio_device_unregister(indio_dev); regulator_disable(priv->vref_reg); - - return 0; } static const struct of_device_id ltc1660_dt_ids[] = { diff --git a/drivers/iio/dac/ltc2632.c b/drivers/iio/dac/ltc2632.c index 53e4b887d372d..aed46c80757e3 100644 --- a/drivers/iio/dac/ltc2632.c +++ b/drivers/iio/dac/ltc2632.c @@ -372,7 +372,7 @@ static int ltc2632_probe(struct spi_device *spi) return iio_device_register(indio_dev); } -static int ltc2632_remove(struct spi_device *spi) +static void ltc2632_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ltc2632_state *st = iio_priv(indio_dev); @@ -381,8 +381,6 @@ static int ltc2632_remove(struct spi_device *spi) if (st->vref_reg) regulator_disable(st->vref_reg); - - return 0; } static const struct spi_device_id ltc2632_id[] = { diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c index 0ae414ee17166..cb9e60e71b915 100644 --- a/drivers/iio/dac/mcp4922.c +++ b/drivers/iio/dac/mcp4922.c @@ -172,7 +172,7 @@ error_disable_reg: return ret; } -static int mcp4922_remove(struct spi_device *spi) +static void mcp4922_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct mcp4922_state *state; @@ -180,8 +180,6 @@ static int mcp4922_remove(struct spi_device *spi) iio_device_unregister(indio_dev); state = iio_priv(indio_dev); regulator_disable(state->vref_reg); - - return 0; } static const struct spi_device_id mcp4922_id[] = { diff --git a/drivers/iio/dac/ti-dac082s085.c b/drivers/iio/dac/ti-dac082s085.c index 6beda2193683a..4e1156e6deb2d 100644 --- a/drivers/iio/dac/ti-dac082s085.c +++ b/drivers/iio/dac/ti-dac082s085.c @@ -313,7 +313,7 @@ err: return ret; } -static int ti_dac_remove(struct spi_device *spi) +static void ti_dac_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ti_dac_chip *ti_dac = iio_priv(indio_dev); @@ -321,8 +321,6 @@ static int ti_dac_remove(struct spi_device *spi) iio_device_unregister(indio_dev); mutex_destroy(&ti_dac->lock); regulator_disable(ti_dac->vref); - - return 0; } static const struct of_device_id ti_dac_of_id[] = { diff --git a/drivers/iio/dac/ti-dac7311.c b/drivers/iio/dac/ti-dac7311.c index 99f275829ec21..e10d17e60ed39 100644 --- a/drivers/iio/dac/ti-dac7311.c +++ b/drivers/iio/dac/ti-dac7311.c @@ -292,7 +292,7 @@ err: return ret; } -static int ti_dac_remove(struct spi_device *spi) +static void ti_dac_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ti_dac_chip *ti_dac = iio_priv(indio_dev); @@ -300,7 +300,6 @@ static int ti_dac_remove(struct spi_device *spi) iio_device_unregister(indio_dev); mutex_destroy(&ti_dac->lock); regulator_disable(ti_dac->vref); - return 0; } static const struct of_device_id ti_dac_of_id[] = { diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c index 3d9eba716b691..f3521330f6fb2 100644 --- a/drivers/iio/frequency/adf4350.c +++ b/drivers/iio/frequency/adf4350.c @@ -589,7 +589,7 @@ error_disable_clk: return ret; } -static int adf4350_remove(struct spi_device *spi) +static void adf4350_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct adf4350_state *st = iio_priv(indio_dev); @@ -604,8 +604,6 @@ static int adf4350_remove(struct spi_device *spi) if (!IS_ERR(reg)) regulator_disable(reg); - - return 0; } static const struct of_device_id adf4350_of_match[] = { diff --git a/drivers/iio/gyro/bmg160_spi.c b/drivers/iio/gyro/bmg160_spi.c index 745962e1e4235..fc2e453527b97 100644 --- a/drivers/iio/gyro/bmg160_spi.c +++ b/drivers/iio/gyro/bmg160_spi.c @@ -27,11 +27,9 @@ static int bmg160_spi_probe(struct spi_device *spi) return bmg160_core_probe(&spi->dev, regmap, spi->irq, id->name); } -static int bmg160_spi_remove(struct spi_device *spi) +static void bmg160_spi_remove(struct spi_device *spi) { bmg160_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id bmg160_spi_id[] = { diff --git a/drivers/iio/gyro/fxas21002c_spi.c b/drivers/iio/gyro/fxas21002c_spi.c index 77ceebef4e34c..c3ac169facf9b 100644 --- a/drivers/iio/gyro/fxas21002c_spi.c +++ b/drivers/iio/gyro/fxas21002c_spi.c @@ -34,11 +34,9 @@ static int fxas21002c_spi_probe(struct spi_device *spi) return fxas21002c_core_probe(&spi->dev, regmap, spi->irq, id->name); } -static int fxas21002c_spi_remove(struct spi_device *spi) +static void fxas21002c_spi_remove(struct spi_device *spi) { fxas21002c_core_remove(&spi->dev); - - return 0; } static const struct spi_device_id fxas21002c_spi_id[] = { diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 273f16dcaff84..856ec901b0913 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -570,7 +570,7 @@ err_disable_reg: return ret; } -static int afe4403_remove(struct spi_device *spi) +static void afe4403_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct afe4403_data *afe = iio_priv(indio_dev); @@ -586,8 +586,6 @@ static int afe4403_remove(struct spi_device *spi) ret = regulator_disable(afe->regulator); if (ret) dev_warn(afe->dev, "Unable to disable regulator\n"); - - return 0; } static const struct spi_device_id afe4403_ids[] = { diff --git a/drivers/iio/magnetometer/bmc150_magn_spi.c b/drivers/iio/magnetometer/bmc150_magn_spi.c index c6ed3ea8460ae..4c570412d65cc 100644 --- a/drivers/iio/magnetometer/bmc150_magn_spi.c +++ b/drivers/iio/magnetometer/bmc150_magn_spi.c @@ -29,11 +29,9 @@ static int bmc150_magn_spi_probe(struct spi_device *spi) return bmc150_magn_probe(&spi->dev, regmap, spi->irq, id->name); } -static int bmc150_magn_spi_remove(struct spi_device *spi) +static void bmc150_magn_spi_remove(struct spi_device *spi) { bmc150_magn_remove(&spi->dev); - - return 0; } static const struct spi_device_id bmc150_magn_spi_id[] = { diff --git a/drivers/iio/magnetometer/hmc5843_spi.c b/drivers/iio/magnetometer/hmc5843_spi.c index 89cf59a62c289..a99dd9b33e95f 100644 --- a/drivers/iio/magnetometer/hmc5843_spi.c +++ b/drivers/iio/magnetometer/hmc5843_spi.c @@ -74,11 +74,9 @@ static int hmc5843_spi_probe(struct spi_device *spi) id->driver_data, id->name); } -static int hmc5843_spi_remove(struct spi_device *spi) +static void hmc5843_spi_remove(struct spi_device *spi) { hmc5843_common_remove(&spi->dev); - - return 0; } static const struct spi_device_id hmc5843_id[] = { diff --git a/drivers/iio/potentiometer/max5487.c b/drivers/iio/potentiometer/max5487.c index 007c2bd324cb1..42723c996c9f4 100644 --- a/drivers/iio/potentiometer/max5487.c +++ b/drivers/iio/potentiometer/max5487.c @@ -112,7 +112,7 @@ static int max5487_spi_probe(struct spi_device *spi) return iio_device_register(indio_dev); } -static int max5487_spi_remove(struct spi_device *spi) +static void max5487_spi_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); int ret; @@ -123,8 +123,6 @@ static int max5487_spi_remove(struct spi_device *spi) ret = max5487_write_cmd(spi, MAX5487_COPY_AB_TO_NV); if (ret) dev_warn(&spi->dev, "Failed to save wiper regs to NV regs\n"); - - return 0; } static const struct spi_device_id max5487_id[] = { diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index 9fa2dcd71760b..7ccd960ced5db 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -107,11 +107,9 @@ static int ms5611_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->driver_data); } -static int ms5611_spi_remove(struct spi_device *spi) +static void ms5611_spi_remove(struct spi_device *spi) { ms5611_remove(spi_get_drvdata(spi)); - - return 0; } static const struct of_device_id ms5611_spi_matches[] = { diff --git a/drivers/iio/pressure/zpa2326_spi.c b/drivers/iio/pressure/zpa2326_spi.c index 85201a4bae44c..ee8ed77536cac 100644 --- a/drivers/iio/pressure/zpa2326_spi.c +++ b/drivers/iio/pressure/zpa2326_spi.c @@ -57,11 +57,9 @@ static int zpa2326_probe_spi(struct spi_device *spi) spi->irq, ZPA2326_DEVICE_ID, regmap); } -static int zpa2326_remove_spi(struct spi_device *spi) +static void zpa2326_remove_spi(struct spi_device *spi) { zpa2326_remove(&spi->dev); - - return 0; } static const struct spi_device_id zpa2326_spi_ids[] = { diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index eda1b23002b58..d1f5354d5ea28 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -1858,7 +1858,7 @@ static void applespi_drain_reads(struct applespi_data *applespi) spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); } -static int applespi_remove(struct spi_device *spi) +static void applespi_remove(struct spi_device *spi) { struct applespi_data *applespi = spi_get_drvdata(spi); @@ -1871,8 +1871,6 @@ static int applespi_remove(struct spi_device *spi) applespi_drain_reads(applespi); debugfs_remove_recursive(applespi->debugfs_root); - - return 0; } static void applespi_shutdown(struct spi_device *spi) diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c index 6e51c9bc619f2..91e44d4c66f7a 100644 --- a/drivers/input/misc/adxl34x-spi.c +++ b/drivers/input/misc/adxl34x-spi.c @@ -87,13 +87,11 @@ static int adxl34x_spi_probe(struct spi_device *spi) return 0; } -static int adxl34x_spi_remove(struct spi_device *spi) +static void adxl34x_spi_remove(struct spi_device *spi) { struct adxl34x *ac = spi_get_drvdata(spi); adxl34x_remove(ac); - - return 0; } static int __maybe_unused adxl34x_spi_suspend(struct device *dev) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index a25a77dd9a32d..bed68a68f3303 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1411,13 +1411,11 @@ static int ads7846_probe(struct spi_device *spi) return 0; } -static int ads7846_remove(struct spi_device *spi) +static void ads7846_remove(struct spi_device *spi) { struct ads7846 *ts = spi_get_drvdata(spi); ads7846_stop(ts); - - return 0; } static struct spi_driver ads7846_driver = { diff --git a/drivers/input/touchscreen/cyttsp4_spi.c b/drivers/input/touchscreen/cyttsp4_spi.c index 2aec41eb76b72..5d7db84f2749c 100644 --- a/drivers/input/touchscreen/cyttsp4_spi.c +++ b/drivers/input/touchscreen/cyttsp4_spi.c @@ -164,12 +164,10 @@ static int cyttsp4_spi_probe(struct spi_device *spi) return PTR_ERR_OR_ZERO(ts); } -static int cyttsp4_spi_remove(struct spi_device *spi) +static void cyttsp4_spi_remove(struct spi_device *spi) { struct cyttsp4 *ts = spi_get_drvdata(spi); cyttsp4_remove(ts); - - return 0; } static struct spi_driver cyttsp4_spi_driver = { diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index a2f55920b9b2e..555dfe98b3c4c 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -64,11 +64,9 @@ static int tsc2005_probe(struct spi_device *spi) tsc2005_cmd); } -static int tsc2005_remove(struct spi_device *spi) +static void tsc2005_remove(struct spi_device *spi) { tsc200x_remove(&spi->dev); - - return 0; } #ifdef CONFIG_OF diff --git a/drivers/leds/leds-cr0014114.c b/drivers/leds/leds-cr0014114.c index d03cfd3c0bfbe..c87686bd7c189 100644 --- a/drivers/leds/leds-cr0014114.c +++ b/drivers/leds/leds-cr0014114.c @@ -266,14 +266,12 @@ static int cr0014114_probe(struct spi_device *spi) return 0; } -static int cr0014114_remove(struct spi_device *spi) +static void cr0014114_remove(struct spi_device *spi) { struct cr0014114 *priv = spi_get_drvdata(spi); cancel_delayed_work_sync(&priv->work); mutex_destroy(&priv->lock); - - return 0; } static const struct of_device_id cr0014114_dt_ids[] = { diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c index 20dc9b9d7deac..cf5fb1195f87f 100644 --- a/drivers/leds/leds-dac124s085.c +++ b/drivers/leds/leds-dac124s085.c @@ -85,15 +85,13 @@ eledcr: return ret; } -static int dac124s085_remove(struct spi_device *spi) +static void dac124s085_remove(struct spi_device *spi) { struct dac124s085 *dac = spi_get_drvdata(spi); int i; for (i = 0; i < ARRAY_SIZE(dac->leds); i++) led_classdev_unregister(&dac->leds[i].ldev); - - return 0; } static struct spi_driver dac124s085_driver = { diff --git a/drivers/leds/leds-el15203000.c b/drivers/leds/leds-el15203000.c index f9eb59a255705..7e7b617bcd56e 100644 --- a/drivers/leds/leds-el15203000.c +++ b/drivers/leds/leds-el15203000.c @@ -315,13 +315,11 @@ static int el15203000_probe(struct spi_device *spi) return el15203000_probe_dt(priv); } -static int el15203000_remove(struct spi_device *spi) +static void el15203000_remove(struct spi_device *spi) { struct el15203000 *priv = spi_get_drvdata(spi); mutex_destroy(&priv->lock); - - return 0; } static const struct of_device_id el15203000_dt_ids[] = { diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c index f1964c96fb159..2bc5c99daf51a 100644 --- a/drivers/leds/leds-spi-byte.c +++ b/drivers/leds/leds-spi-byte.c @@ -130,13 +130,11 @@ static int spi_byte_probe(struct spi_device *spi) return 0; } -static int spi_byte_remove(struct spi_device *spi) +static void spi_byte_remove(struct spi_device *spi) { struct spi_byte_led *led = spi_get_drvdata(spi); mutex_destroy(&led->mutex); - - return 0; } static struct spi_driver spi_byte_driver = { diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c index 6f2a66bc87fb8..6be4e5528879f 100644 --- a/drivers/media/spi/cxd2880-spi.c +++ b/drivers/media/spi/cxd2880-spi.c @@ -625,7 +625,7 @@ fail_regulator: return ret; } -static int +static void cxd2880_spi_remove(struct spi_device *spi) { struct cxd2880_dvb_spi *dvb_spi = spi_get_drvdata(spi); @@ -643,8 +643,6 @@ cxd2880_spi_remove(struct spi_device *spi) kfree(dvb_spi); pr_info("cxd2880_spi remove ok.\n"); - - return 0; } static const struct spi_device_id cxd2880_spi_id[] = { diff --git a/drivers/media/spi/gs1662.c b/drivers/media/spi/gs1662.c index f86ef1ca12885..75c21a93e6d00 100644 --- a/drivers/media/spi/gs1662.c +++ b/drivers/media/spi/gs1662.c @@ -458,13 +458,11 @@ static int gs_probe(struct spi_device *spi) return ret; } -static int gs_remove(struct spi_device *spi) +static void gs_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); v4l2_device_unregister_subdev(sd); - - return 0; } static struct spi_driver gs_driver = { diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c index 44247049a3190..ad6c72c1ed042 100644 --- a/drivers/media/tuners/msi001.c +++ b/drivers/media/tuners/msi001.c @@ -472,7 +472,7 @@ err: return ret; } -static int msi001_remove(struct spi_device *spi) +static void msi001_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); struct msi001_dev *dev = sd_to_msi001_dev(sd); @@ -486,7 +486,6 @@ static int msi001_remove(struct spi_device *spi) v4l2_device_unregister_subdev(&dev->sd); v4l2_ctrl_handler_free(&dev->hdl); kfree(dev); - return 0; } static const struct spi_device_id msi001_id_table[] = { diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c index 9fe06dda37829..03620c8efe34a 100644 --- a/drivers/mfd/arizona-spi.c +++ b/drivers/mfd/arizona-spi.c @@ -206,13 +206,11 @@ static int arizona_spi_probe(struct spi_device *spi) return arizona_dev_init(arizona); } -static int arizona_spi_remove(struct spi_device *spi) +static void arizona_spi_remove(struct spi_device *spi) { struct arizona *arizona = spi_get_drvdata(spi); arizona_dev_exit(arizona); - - return 0; } static const struct spi_device_id arizona_spi_ids[] = { diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c index 5faf3766a5e20..b79a57b45c1e8 100644 --- a/drivers/mfd/da9052-spi.c +++ b/drivers/mfd/da9052-spi.c @@ -55,12 +55,11 @@ static int da9052_spi_probe(struct spi_device *spi) return da9052_device_init(da9052, id->driver_data); } -static int da9052_spi_remove(struct spi_device *spi) +static void da9052_spi_remove(struct spi_device *spi) { struct da9052 *da9052 = spi_get_drvdata(spi); da9052_device_exit(da9052); - return 0; } static const struct spi_device_id da9052_spi_id[] = { diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index 70fa18b04ad2b..2280f756f4229 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -392,7 +392,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap, return ret; } -static int ezx_pcap_remove(struct spi_device *spi) +static void ezx_pcap_remove(struct spi_device *spi) { struct pcap_chip *pcap = spi_get_drvdata(spi); unsigned long flags; @@ -412,8 +412,6 @@ static int ezx_pcap_remove(struct spi_device *spi) irq_set_chip_and_handler(i, NULL, NULL); destroy_workqueue(pcap->workqueue); - - return 0; } static int ezx_pcap_probe(struct spi_device *spi) diff --git a/drivers/mfd/madera-spi.c b/drivers/mfd/madera-spi.c index e860f5ff09336..da84eb50e53a3 100644 --- a/drivers/mfd/madera-spi.c +++ b/drivers/mfd/madera-spi.c @@ -112,13 +112,11 @@ static int madera_spi_probe(struct spi_device *spi) return madera_dev_init(madera); } -static int madera_spi_remove(struct spi_device *spi) +static void madera_spi_remove(struct spi_device *spi) { struct madera *madera = spi_get_drvdata(spi); madera_dev_exit(madera); - - return 0; } static const struct spi_device_id madera_spi_ids[] = { diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c index 4d8913d647e63..f803527e58194 100644 --- a/drivers/mfd/mc13xxx-spi.c +++ b/drivers/mfd/mc13xxx-spi.c @@ -166,10 +166,9 @@ static int mc13xxx_spi_probe(struct spi_device *spi) return mc13xxx_common_init(&spi->dev); } -static int mc13xxx_spi_remove(struct spi_device *spi) +static void mc13xxx_spi_remove(struct spi_device *spi) { mc13xxx_common_exit(&spi->dev); - return 0; } static struct spi_driver mc13xxx_spi_driver = { diff --git a/drivers/mfd/rsmu_spi.c b/drivers/mfd/rsmu_spi.c index fec2b4ec477c5..d2f3d8f1e05af 100644 --- a/drivers/mfd/rsmu_spi.c +++ b/drivers/mfd/rsmu_spi.c @@ -220,13 +220,11 @@ static int rsmu_spi_probe(struct spi_device *client) return rsmu_core_init(rsmu); } -static int rsmu_spi_remove(struct spi_device *client) +static void rsmu_spi_remove(struct spi_device *client) { struct rsmu_ddata *rsmu = spi_get_drvdata(client); rsmu_core_exit(rsmu); - - return 0; } static const struct spi_device_id rsmu_spi_id[] = { diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index 6c5915016be50..ad8055a0e2869 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -102,13 +102,11 @@ stmpe_spi_probe(struct spi_device *spi) return stmpe_probe(&spi_ci, id->driver_data); } -static int stmpe_spi_remove(struct spi_device *spi) +static void stmpe_spi_remove(struct spi_device *spi) { struct stmpe *stmpe = spi_get_drvdata(spi); stmpe_remove(stmpe); - - return 0; } static const struct of_device_id stmpe_spi_of_match[] = { diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c index d701926aa46e6..bba38fbc781df 100644 --- a/drivers/mfd/tps65912-spi.c +++ b/drivers/mfd/tps65912-spi.c @@ -50,13 +50,11 @@ static int tps65912_spi_probe(struct spi_device *spi) return tps65912_device_init(tps); } -static int tps65912_spi_remove(struct spi_device *spi) +static void tps65912_spi_remove(struct spi_device *spi) { struct tps65912 *tps = spi_get_drvdata(spi); tps65912_device_exit(tps); - - return 0; } static const struct spi_device_id tps65912_spi_id_table[] = { diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c index a9e75d80ad362..263055bda48b7 100644 --- a/drivers/misc/ad525x_dpot-spi.c +++ b/drivers/misc/ad525x_dpot-spi.c @@ -90,10 +90,9 @@ static int ad_dpot_spi_probe(struct spi_device *spi) spi_get_device_id(spi)->name); } -static int ad_dpot_spi_remove(struct spi_device *spi) +static void ad_dpot_spi_remove(struct spi_device *spi) { ad_dpot_remove(&spi->dev); - return 0; } static const struct spi_device_id ad_dpot_spi_id[] = { diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c index 1f15399e5cb49..b630625b3024b 100644 --- a/drivers/misc/eeprom/eeprom_93xx46.c +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -555,14 +555,12 @@ static int eeprom_93xx46_probe(struct spi_device *spi) return 0; } -static int eeprom_93xx46_remove(struct spi_device *spi) +static void eeprom_93xx46_remove(struct spi_device *spi) { struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi); if (!(edev->pdata->flags & EE_READONLY)) device_remove_file(&spi->dev, &dev_attr_erase); - - return 0; } static struct spi_driver eeprom_93xx46_driver = { diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c index 98828030b5a4d..bac4df2e52314 100644 --- a/drivers/misc/lattice-ecp3-config.c +++ b/drivers/misc/lattice-ecp3-config.c @@ -211,13 +211,11 @@ static int lattice_ecp3_probe(struct spi_device *spi) return 0; } -static int lattice_ecp3_remove(struct spi_device *spi) +static void lattice_ecp3_remove(struct spi_device *spi) { struct fpga_data *data = spi_get_drvdata(spi); wait_for_completion(&data->fw_loaded); - - return 0; } static const struct spi_device_id lattice_ecp3_id[] = { diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c index 9e40dfb607425..203a108b8883c 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_spi.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -96,15 +96,13 @@ static int lis302dl_spi_probe(struct spi_device *spi) return lis3lv02d_init_device(&lis3_dev); } -static int lis302dl_spi_remove(struct spi_device *spi) +static void lis302dl_spi_remove(struct spi_device *spi) { struct lis3lv02d *lis3 = spi_get_drvdata(spi); lis3lv02d_joystick_disable(lis3); lis3lv02d_poweroff(lis3); lis3lv02d_remove_fs(&lis3_dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index a576181e9db03..106dd204b1a7f 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1489,7 +1489,7 @@ nomem: } -static int mmc_spi_remove(struct spi_device *spi) +static void mmc_spi_remove(struct spi_device *spi) { struct mmc_host *mmc = dev_get_drvdata(&spi->dev); struct mmc_spi_host *host = mmc_priv(mmc); @@ -1507,7 +1507,6 @@ static int mmc_spi_remove(struct spi_device *spi) spi->max_speed_hz = mmc->f_max; mmc_spi_put_pdata(spi); mmc_free_host(mmc); - return 0; } static const struct spi_device_id mmc_spi_dev_ids[] = { diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c index a8b31bddf14b8..008df9d8898db 100644 --- a/drivers/mtd/devices/mchp23k256.c +++ b/drivers/mtd/devices/mchp23k256.c @@ -209,13 +209,11 @@ static int mchp23k256_probe(struct spi_device *spi) return 0; } -static int mchp23k256_remove(struct spi_device *spi) +static void mchp23k256_remove(struct spi_device *spi) { struct mchp23k256_flash *flash = spi_get_drvdata(spi); WARN_ON(mtd_device_unregister(&flash->mtd)); - - return 0; } static const struct of_device_id mchp23k256_of_table[] = { diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index 231a107901960..a3fd426df74be 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -341,13 +341,11 @@ static int mchp48l640_probe(struct spi_device *spi) return 0; } -static int mchp48l640_remove(struct spi_device *spi) +static void mchp48l640_remove(struct spi_device *spi) { struct mchp48l640_flash *flash = spi_get_drvdata(spi); WARN_ON(mtd_device_unregister(&flash->mtd)); - - return 0; } static const struct of_device_id mchp48l640_of_table[] = { diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 734878abaa236..134e273285974 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -916,7 +916,7 @@ static int dataflash_probe(struct spi_device *spi) return status; } -static int dataflash_remove(struct spi_device *spi) +static void dataflash_remove(struct spi_device *spi) { struct dataflash *flash = spi_get_drvdata(spi); @@ -925,8 +925,6 @@ static int dataflash_remove(struct spi_device *spi) WARN_ON(mtd_device_unregister(&flash->mtd)); kfree(flash); - - return 0; } static struct spi_driver dataflash_driver = { diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c index 7f124c1bfa406..8813994ce9f45 100644 --- a/drivers/mtd/devices/sst25l.c +++ b/drivers/mtd/devices/sst25l.c @@ -398,13 +398,11 @@ static int sst25l_probe(struct spi_device *spi) return 0; } -static int sst25l_remove(struct spi_device *spi) +static void sst25l_remove(struct spi_device *spi) { struct sst25l_flash *flash = spi_get_drvdata(spi); WARN_ON(mtd_device_unregister(&flash->mtd)); - - return 0; } static struct spi_driver sst25l_driver = { diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 04687b15b250e..41645a24384ce 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -388,7 +388,7 @@ out_power: return ret; } -static int tcan4x5x_can_remove(struct spi_device *spi) +static void tcan4x5x_can_remove(struct spi_device *spi) { struct tcan4x5x_priv *priv = spi_get_drvdata(spi); @@ -397,8 +397,6 @@ static int tcan4x5x_can_remove(struct spi_device *spi) tcan4x5x_power_enable(priv->power, 0); m_can_class_free_dev(priv->cdev.net); - - return 0; } static const struct of_device_id tcan4x5x_of_match[] = { diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index cfcc14fe3e42d..664b8f14d7b05 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -948,7 +948,7 @@ static int hi3110_can_probe(struct spi_device *spi) return dev_err_probe(dev, ret, "Probe failed\n"); } -static int hi3110_can_remove(struct spi_device *spi) +static void hi3110_can_remove(struct spi_device *spi) { struct hi3110_priv *priv = spi_get_drvdata(spi); struct net_device *net = priv->net; @@ -960,8 +960,6 @@ static int hi3110_can_remove(struct spi_device *spi) clk_disable_unprepare(priv->clk); free_candev(net); - - return 0; } static int __maybe_unused hi3110_can_suspend(struct device *dev) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 025e07cb74397..d23edaf224204 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1427,7 +1427,7 @@ out_free: return ret; } -static int mcp251x_can_remove(struct spi_device *spi) +static void mcp251x_can_remove(struct spi_device *spi) { struct mcp251x_priv *priv = spi_get_drvdata(spi); struct net_device *net = priv->net; @@ -1442,8 +1442,6 @@ static int mcp251x_can_remove(struct spi_device *spi) clk_disable_unprepare(priv->clk); free_candev(net); - - return 0; } static int __maybe_unused mcp251x_can_suspend(struct device *dev) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index b5986df6eca0b..65c9b31666a6a 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1966,7 +1966,7 @@ static int mcp251xfd_probe(struct spi_device *spi) return err; } -static int mcp251xfd_remove(struct spi_device *spi) +static void mcp251xfd_remove(struct spi_device *spi) { struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; @@ -1975,8 +1975,6 @@ static int mcp251xfd_remove(struct spi_device *spi) mcp251xfd_unregister(priv); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); - - return 0; } static int __maybe_unused mcp251xfd_runtime_suspend(struct device *device) diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 2b88f03e52521..0e54b2a0c2118 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -314,7 +314,7 @@ static int b53_spi_probe(struct spi_device *spi) return 0; } -static int b53_spi_remove(struct spi_device *spi) +static void b53_spi_remove(struct spi_device *spi) { struct b53_device *dev = spi_get_drvdata(spi); @@ -322,8 +322,6 @@ static int b53_spi_remove(struct spi_device *spi) b53_switch_remove(dev); spi_set_drvdata(spi, NULL); - - return 0; } static void b53_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index 866767b70d65b..673589dc88ab4 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -87,7 +87,7 @@ static int ksz8795_spi_probe(struct spi_device *spi) return 0; } -static int ksz8795_spi_remove(struct spi_device *spi) +static void ksz8795_spi_remove(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); @@ -95,8 +95,6 @@ static int ksz8795_spi_remove(struct spi_device *spi) ksz_switch_remove(dev); spi_set_drvdata(spi, NULL); - - return 0; } static void ksz8795_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index e3cb0e6c9f6f2..940bb9665f150 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -65,7 +65,7 @@ static int ksz9477_spi_probe(struct spi_device *spi) return 0; } -static int ksz9477_spi_remove(struct spi_device *spi) +static void ksz9477_spi_remove(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); @@ -73,8 +73,6 @@ static int ksz9477_spi_remove(struct spi_device *spi) ksz_switch_remove(dev); spi_set_drvdata(spi, NULL); - - return 0; } static void ksz9477_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b513713be6101..c2a47c6693b80 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3346,18 +3346,16 @@ static int sja1105_probe(struct spi_device *spi) return dsa_register_switch(priv->ds); } -static int sja1105_remove(struct spi_device *spi) +static void sja1105_remove(struct spi_device *spi) { struct sja1105_private *priv = spi_get_drvdata(spi); if (!priv) - return 0; + return; dsa_unregister_switch(priv->ds); spi_set_drvdata(spi, NULL); - - return 0; } static void sja1105_shutdown(struct spi_device *spi) diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 645398901e05e..3110895358d8d 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -159,18 +159,16 @@ static int vsc73xx_spi_probe(struct spi_device *spi) return vsc73xx_probe(&vsc_spi->vsc); } -static int vsc73xx_spi_remove(struct spi_device *spi) +static void vsc73xx_spi_remove(struct spi_device *spi) { struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi); if (!vsc_spi) - return 0; + return; vsc73xx_remove(&vsc_spi->vsc); spi_set_drvdata(spi, NULL); - - return 0; } static void vsc73xx_spi_shutdown(struct spi_device *spi) diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c index e7a9f98632587..bf70481bb1cad 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.c +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -1102,7 +1102,7 @@ err: return ret; } -static int ax88796c_remove(struct spi_device *spi) +static void ax88796c_remove(struct spi_device *spi) { struct ax88796c_device *ax_local = dev_get_drvdata(&spi->dev); struct net_device *ndev = ax_local->ndev; @@ -1112,8 +1112,6 @@ static int ax88796c_remove(struct spi_device *spi) netif_info(ax_local, probe, ndev, "removing network device %s %s\n", dev_driver_string(&spi->dev), dev_name(&spi->dev)); - - return 0; } #ifdef CONFIG_OF diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 0303e727e99f9..d167d93e4c12f 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -452,11 +452,9 @@ static int ks8851_probe_spi(struct spi_device *spi) return ks8851_probe_common(netdev, dev, msg_enable); } -static int ks8851_remove_spi(struct spi_device *spi) +static void ks8851_remove_spi(struct spi_device *spi) { ks8851_remove_common(&spi->dev); - - return 0; } static const struct of_device_id ks8851_match_table[] = { diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 634ac7649c43e..db5a3edb4c3c0 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1612,15 +1612,13 @@ error_alloc: return ret; } -static int enc28j60_remove(struct spi_device *spi) +static void enc28j60_remove(struct spi_device *spi) { struct enc28j60_net *priv = spi_get_drvdata(spi); unregister_netdev(priv->netdev); free_irq(spi->irq, priv); free_netdev(priv->netdev); - - return 0; } static const struct of_device_id enc28j60_dt_ids[] = { diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index b90efc80fb59f..dc1840cb5b10f 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1093,7 +1093,7 @@ error_out: return ret; } -static int encx24j600_spi_remove(struct spi_device *spi) +static void encx24j600_spi_remove(struct spi_device *spi) { struct encx24j600_priv *priv = dev_get_drvdata(&spi->dev); @@ -1101,8 +1101,6 @@ static int encx24j600_spi_remove(struct spi_device *spi) kthread_stop(priv->kworker_task); free_netdev(priv->ndev); - - return 0; } static const struct spi_device_id encx24j600_spi_id_table[] = { diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 955cce644392a..3c5494afd3c04 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -1001,7 +1001,7 @@ qca_spi_probe(struct spi_device *spi) return 0; } -static int +static void qca_spi_remove(struct spi_device *spi) { struct net_device *qcaspi_devs = spi_get_drvdata(spi); @@ -1011,8 +1011,6 @@ qca_spi_remove(struct spi_device *spi) unregister_netdev(qcaspi_devs); free_netdev(qcaspi_devs); - - return 0; } static const struct spi_device_id qca_spi_id[] = { diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 89a31783fbb48..25739b182ac7b 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -731,7 +731,7 @@ static int mse102x_probe_spi(struct spi_device *spi) return 0; } -static int mse102x_remove_spi(struct spi_device *spi) +static void mse102x_remove_spi(struct spi_device *spi) { struct mse102x_net *mse = dev_get_drvdata(&spi->dev); struct mse102x_net_spi *mses = to_mse102x_spi(mse); @@ -741,8 +741,6 @@ static int mse102x_remove_spi(struct spi_device *spi) mse102x_remove_device_debugfs(mses); unregister_netdev(mse->ndev); - - return 0; } static const struct of_device_id mse102x_match_table[] = { diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 7779a36da3c85..7c52796273a49 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -461,11 +461,9 @@ static int w5100_spi_probe(struct spi_device *spi) return w5100_probe(&spi->dev, ops, priv_size, mac, spi->irq, -EINVAL); } -static int w5100_spi_remove(struct spi_device *spi) +static void w5100_spi_remove(struct spi_device *spi) { w5100_remove(&spi->dev); - - return 0; } static const struct spi_device_id w5100_spi_ids[] = { diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 7db9cbd0f5ded..6afdf1622944e 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1304,7 +1304,7 @@ err_alloc_wq: return ret; } -static int adf7242_remove(struct spi_device *spi) +static void adf7242_remove(struct spi_device *spi) { struct adf7242_local *lp = spi_get_drvdata(spi); @@ -1316,8 +1316,6 @@ static int adf7242_remove(struct spi_device *spi) ieee802154_unregister_hw(lp->hw); mutex_destroy(&lp->bmux); ieee802154_free_hw(lp->hw); - - return 0; } static const struct of_device_id adf7242_of_match[] = { diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f55..a4734323dc293 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1759,7 +1759,7 @@ free_dev: return rc; } -static int at86rf230_remove(struct spi_device *spi) +static void at86rf230_remove(struct spi_device *spi) { struct at86rf230_local *lp = spi_get_drvdata(spi); @@ -1769,8 +1769,6 @@ static int at86rf230_remove(struct spi_device *spi) ieee802154_free_hw(lp->hw); at86rf230_debugfs_remove(); dev_dbg(&spi->dev, "unregistered at86rf230\n"); - - return 0; } static const struct of_device_id at86rf230_of_match[] = { diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index ece6ff6049f66..b499bbe4d48f2 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -831,7 +831,7 @@ static void ca8210_rx_done(struct cas_control *cas_ctl) finish:; } -static int ca8210_remove(struct spi_device *spi_device); +static void ca8210_remove(struct spi_device *spi_device); /** * ca8210_spi_transfer_complete() - Called when a single spi transfer has @@ -3048,7 +3048,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv) * * Return: 0 or linux error code */ -static int ca8210_remove(struct spi_device *spi_device) +static void ca8210_remove(struct spi_device *spi_device) { struct ca8210_priv *priv; struct ca8210_platform_data *pdata; @@ -3088,8 +3088,6 @@ static int ca8210_remove(struct spi_device *spi_device) if (IS_ENABLED(CONFIG_IEEE802154_CA8210_DEBUGFS)) ca8210_test_interface_clear(priv); } - - return 0; } /** diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 89c046b204e0c..1e1f40f628a02 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -1213,7 +1213,7 @@ err_hw_init: return ret; } -static int cc2520_remove(struct spi_device *spi) +static void cc2520_remove(struct spi_device *spi) { struct cc2520_private *priv = spi_get_drvdata(spi); @@ -1222,8 +1222,6 @@ static int cc2520_remove(struct spi_device *spi) ieee802154_unregister_hw(priv->hw); ieee802154_free_hw(priv->hw); - - return 0; } static const struct spi_device_id cc2520_ids[] = { diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 8dc04e2590b18..a3af52a8e6ddf 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1335,7 +1335,7 @@ free_dev: return ret; } -static int mcr20a_remove(struct spi_device *spi) +static void mcr20a_remove(struct spi_device *spi) { struct mcr20a_local *lp = spi_get_drvdata(spi); @@ -1343,8 +1343,6 @@ static int mcr20a_remove(struct spi_device *spi) ieee802154_unregister_hw(lp->hw); ieee802154_free_hw(lp->hw); - - return 0; } static const struct of_device_id mcr20a_of_match[] = { diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index ff83e00b77af7..ee4cfbf2c5cc0 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -1356,7 +1356,7 @@ err_ret: return ret; } -static int mrf24j40_remove(struct spi_device *spi) +static void mrf24j40_remove(struct spi_device *spi) { struct mrf24j40 *devrec = spi_get_drvdata(spi); @@ -1366,8 +1366,6 @@ static int mrf24j40_remove(struct spi_device *spi) ieee802154_free_hw(devrec->hw); /* TODO: Will ieee802154_free_device() wait until ->xmit() is * complete? */ - - return 0; } static const struct of_device_id mrf24j40_of_match[] = { diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 8b5445a724ce5..ff37f8ba6758b 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -517,7 +517,7 @@ static int ks8995_probe(struct spi_device *spi) return 0; } -static int ks8995_remove(struct spi_device *spi) +static void ks8995_remove(struct spi_device *spi) { struct ks8995_switch *ks = spi_get_drvdata(spi); @@ -526,8 +526,6 @@ static int ks8995_remove(struct spi_device *spi) /* assert reset */ if (ks->pdata && gpio_is_valid(ks->pdata->reset_gpio)) gpiod_set_value(gpio_to_desc(ks->pdata->reset_gpio), 1); - - return 0; } /* ------------------------------------------------------------------------ */ diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index 8e3b1c717c107..6063552cea9b2 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -194,10 +194,9 @@ static int slic_ds26522_init_configure(struct spi_device *spi) return 0; } -static int slic_ds26522_remove(struct spi_device *spi) +static void slic_ds26522_remove(struct spi_device *spi) { pr_info("DS26522 module uninstalled\n"); - return 0; } static int slic_ds26522_probe(struct spi_device *spi) diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c index ab0fe85658518..f99b7ba69fc3d 100644 --- a/drivers/net/wireless/intersil/p54/p54spi.c +++ b/drivers/net/wireless/intersil/p54/p54spi.c @@ -669,7 +669,7 @@ err_free: return ret; } -static int p54spi_remove(struct spi_device *spi) +static void p54spi_remove(struct spi_device *spi) { struct p54s_priv *priv = spi_get_drvdata(spi); @@ -684,8 +684,6 @@ static int p54spi_remove(struct spi_device *spi) mutex_destroy(&priv->mutex); p54_free_common(priv->hw); - - return 0; } diff --git a/drivers/net/wireless/marvell/libertas/if_spi.c b/drivers/net/wireless/marvell/libertas/if_spi.c index cd9f8ecf171f3..ff1c7ec8c450b 100644 --- a/drivers/net/wireless/marvell/libertas/if_spi.c +++ b/drivers/net/wireless/marvell/libertas/if_spi.c @@ -1195,7 +1195,7 @@ out: return err; } -static int libertas_spi_remove(struct spi_device *spi) +static void libertas_spi_remove(struct spi_device *spi) { struct if_spi_card *card = spi_get_drvdata(spi); struct lbs_private *priv = card->priv; @@ -1212,8 +1212,6 @@ static int libertas_spi_remove(struct spi_device *spi) if (card->pdata->teardown) card->pdata->teardown(spi); free_if_spi_card(card); - - return 0; } static int if_spi_suspend(struct device *dev) diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index 2c2ed4b09efd5..d2db522893997 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -240,7 +240,7 @@ free: return ret; } -static int wilc_bus_remove(struct spi_device *spi) +static void wilc_bus_remove(struct spi_device *spi) { struct wilc *wilc = spi_get_drvdata(spi); struct wilc_spi *spi_priv = wilc->bus_data; @@ -248,8 +248,6 @@ static int wilc_bus_remove(struct spi_device *spi) clk_disable_unprepare(wilc->rtc_clk); wilc_netdev_cleanup(wilc); kfree(spi_priv); - - return 0; } static const struct of_device_id wilc_of_match[] = { diff --git a/drivers/net/wireless/st/cw1200/cw1200_spi.c b/drivers/net/wireless/st/cw1200/cw1200_spi.c index 271ed2ce2d7f9..fe0d220da44d0 100644 --- a/drivers/net/wireless/st/cw1200/cw1200_spi.c +++ b/drivers/net/wireless/st/cw1200/cw1200_spi.c @@ -423,7 +423,7 @@ static int cw1200_spi_probe(struct spi_device *func) } /* Disconnect Function to be called by SPI stack when device is disconnected */ -static int cw1200_spi_disconnect(struct spi_device *func) +static void cw1200_spi_disconnect(struct spi_device *func) { struct hwbus_priv *self = spi_get_drvdata(func); @@ -435,8 +435,6 @@ static int cw1200_spi_disconnect(struct spi_device *func) } } cw1200_spi_off(dev_get_platdata(&func->dev)); - - return 0; } static int __maybe_unused cw1200_spi_suspend(struct device *dev) diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index 5b894bd6237ee..9df38726e8b0b 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -327,14 +327,12 @@ out_free: return ret; } -static int wl1251_spi_remove(struct spi_device *spi) +static void wl1251_spi_remove(struct spi_device *spi) { struct wl1251 *wl = spi_get_drvdata(spi); wl1251_free_hw(wl); regulator_disable(wl->vio); - - return 0; } static struct spi_driver wl1251_spi_driver = { diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 354a7e1c3315c..7eae1ec2eb2b3 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -546,13 +546,11 @@ out_dev_put: return ret; } -static int wl1271_remove(struct spi_device *spi) +static void wl1271_remove(struct spi_device *spi) { struct wl12xx_spi_glue *glue = spi_get_drvdata(spi); platform_device_unregister(glue->core); - - return 0; } static struct spi_driver wl1271_spi_driver = { diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 5b833a9a83f80..a38e2fcdfd39f 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -174,12 +174,11 @@ static int nfcmrvl_spi_probe(struct spi_device *spi) return 0; } -static int nfcmrvl_spi_remove(struct spi_device *spi) +static void nfcmrvl_spi_remove(struct spi_device *spi) { struct nfcmrvl_spi_drv_data *drv_data = spi_get_drvdata(spi); nfcmrvl_nci_unregister_dev(drv_data->priv); - return 0; } static const struct of_device_id of_nfcmrvl_spi_match[] __maybe_unused = { diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 4e723992e74c2..169eacc0a32ae 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -263,13 +263,11 @@ static int st_nci_spi_probe(struct spi_device *dev) return r; } -static int st_nci_spi_remove(struct spi_device *dev) +static void st_nci_spi_remove(struct spi_device *dev) { struct st_nci_spi_phy *phy = spi_get_drvdata(dev); ndlc_remove(phy->ndlc); - - return 0; } static struct spi_device_id st_nci_spi_id_table[] = { diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index b23f47936473d..ed704bb772264 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1198,7 +1198,7 @@ err_disable_regulator: return ret; } -static int st95hf_remove(struct spi_device *nfc_spi_dev) +static void st95hf_remove(struct spi_device *nfc_spi_dev) { int result = 0; unsigned char reset_cmd = ST95HF_COMMAND_RESET; @@ -1236,8 +1236,6 @@ static int st95hf_remove(struct spi_device *nfc_spi_dev) /* disable regulator */ if (stcontext->st95hf_supply) regulator_disable(stcontext->st95hf_supply); - - return 0; } /* Register as SPI protocol driver */ diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 29ca9c328df2b..21d68664fe082 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -2144,7 +2144,7 @@ err_destroy_lock: return ret; } -static int trf7970a_remove(struct spi_device *spi) +static void trf7970a_remove(struct spi_device *spi) { struct trf7970a *trf = spi_get_drvdata(spi); @@ -2160,8 +2160,6 @@ static int trf7970a_remove(struct spi_device *spi) regulator_disable(trf->regulator); mutex_destroy(&trf->lock); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index 713c58687721b..8493af0f680e2 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -786,13 +786,11 @@ static int cros_ec_spi_probe(struct spi_device *spi) return 0; } -static int cros_ec_spi_remove(struct spi_device *spi) +static void cros_ec_spi_remove(struct spi_device *spi) { struct cros_ec_device *ec_dev = spi_get_drvdata(spi); cros_ec_unregister(ec_dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c index 0d46706afd2da..4823bd2819f64 100644 --- a/drivers/platform/olpc/olpc-xo175-ec.c +++ b/drivers/platform/olpc/olpc-xo175-ec.c @@ -648,7 +648,7 @@ static struct olpc_ec_driver olpc_xo175_ec_driver = { .ec_cmd = olpc_xo175_ec_cmd, }; -static int olpc_xo175_ec_remove(struct spi_device *spi) +static void olpc_xo175_ec_remove(struct spi_device *spi) { if (pm_power_off == olpc_xo175_ec_power_off) pm_power_off = NULL; @@ -657,8 +657,6 @@ static int olpc_xo175_ec_remove(struct spi_device *spi) platform_device_unregister(olpc_ec); olpc_ec = NULL; - - return 0; } static int olpc_xo175_ec_probe(struct spi_device *spi) diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index 2f83adef966eb..6d66ab5a8b176 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -185,10 +185,9 @@ static int ds1302_probe(struct spi_device *spi) return 0; } -static int ds1302_remove(struct spi_device *spi) +static void ds1302_remove(struct spi_device *spi) { spi_set_drvdata(spi, NULL); - return 0; } #ifdef CONFIG_OF diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index 9ef107b99b658..ed9360486953e 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -720,7 +720,7 @@ static int ds1305_probe(struct spi_device *spi) return 0; } -static int ds1305_remove(struct spi_device *spi) +static void ds1305_remove(struct spi_device *spi) { struct ds1305 *ds1305 = spi_get_drvdata(spi); @@ -730,8 +730,6 @@ static int ds1305_remove(struct spi_device *spi) devm_free_irq(&spi->dev, spi->irq, ds1305); cancel_work_sync(&ds1305->work); } - - return 0; } static struct spi_driver ds1305_driver = { diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index f14ed6c96437b..ed5a6ba89a3ee 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -434,11 +434,9 @@ static int ds1343_probe(struct spi_device *spi) return 0; } -static int ds1343_remove(struct spi_device *spi) +static void ds1343_remove(struct spi_device *spi) { dev_pm_clear_wake_irq(&spi->dev); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 37f4443ce9a09..e9d83d65873bd 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -854,15 +854,13 @@ static int spi_mem_probe(struct spi_device *spi) return memdrv->probe(mem); } -static int spi_mem_remove(struct spi_device *spi) +static void spi_mem_remove(struct spi_device *spi) { struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver); struct spi_mem *mem = spi_get_drvdata(spi); if (memdrv->remove) - return memdrv->remove(mem); - - return 0; + memdrv->remove(mem); } static void spi_mem_shutdown(struct spi_device *spi) diff --git a/drivers/spi/spi-slave-system-control.c b/drivers/spi/spi-slave-system-control.c index 169f3d595f60c..d37cfe995a632 100644 --- a/drivers/spi/spi-slave-system-control.c +++ b/drivers/spi/spi-slave-system-control.c @@ -132,13 +132,12 @@ static int spi_slave_system_control_probe(struct spi_device *spi) return 0; } -static int spi_slave_system_control_remove(struct spi_device *spi) +static void spi_slave_system_control_remove(struct spi_device *spi) { struct spi_slave_system_control_priv *priv = spi_get_drvdata(spi); spi_slave_abort(spi); wait_for_completion(&priv->finished); - return 0; } static struct spi_driver spi_slave_system_control_driver = { diff --git a/drivers/spi/spi-slave-time.c b/drivers/spi/spi-slave-time.c index f2e07a392d686..f56c1afb85340 100644 --- a/drivers/spi/spi-slave-time.c +++ b/drivers/spi/spi-slave-time.c @@ -106,13 +106,12 @@ static int spi_slave_time_probe(struct spi_device *spi) return 0; } -static int spi_slave_time_remove(struct spi_device *spi) +static void spi_slave_time_remove(struct spi_device *spi) { struct spi_slave_time_priv *priv = spi_get_drvdata(spi); spi_slave_abort(spi); wait_for_completion(&priv->finished); - return 0; } static struct spi_driver spi_slave_time_driver = { diff --git a/drivers/spi/spi-tle62x0.c b/drivers/spi/spi-tle62x0.c index f8ad0709d0152..a565352f63815 100644 --- a/drivers/spi/spi-tle62x0.c +++ b/drivers/spi/spi-tle62x0.c @@ -288,7 +288,7 @@ static int tle62x0_probe(struct spi_device *spi) return ret; } -static int tle62x0_remove(struct spi_device *spi) +static void tle62x0_remove(struct spi_device *spi) { struct tle62x0_state *st = spi_get_drvdata(spi); int ptr; @@ -298,7 +298,6 @@ static int tle62x0_remove(struct spi_device *spi) device_remove_file(&spi->dev, &dev_attr_status_show); kfree(st); - return 0; } static struct spi_driver tle62x0_driver = { diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4599b121d7442..ead9a132dcb9c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -404,15 +404,8 @@ static void spi_remove(struct device *dev) { const struct spi_driver *sdrv = to_spi_driver(dev->driver); - if (sdrv->remove) { - int ret; - - ret = sdrv->remove(to_spi_device(dev)); - if (ret) - dev_warn(dev, - "Failed to unbind driver (%pe), ignoring\n", - ERR_PTR(ret)); - } + if (sdrv->remove) + sdrv->remove(to_spi_device(dev)); dev_pm_domain_detach(dev, true); } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index a5cceca8b82b6..9468f74308bd5 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -803,7 +803,7 @@ static int spidev_probe(struct spi_device *spi) return status; } -static int spidev_remove(struct spi_device *spi) +static void spidev_remove(struct spi_device *spi) { struct spidev_data *spidev = spi_get_drvdata(spi); @@ -820,8 +820,6 @@ static int spidev_remove(struct spi_device *spi) if (spidev->users == 0) kfree(spidev); mutex_unlock(&device_list_lock); - - return 0; } static struct spi_driver spidev_spi_driver = { diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 6a7545b5bcd2d..b68f5f9b7c78c 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -286,12 +286,11 @@ static int fbtft_driver_probe_spi(struct spi_device *spi) \ return fbtft_probe_common(_display, spi, NULL); \ } \ \ -static int fbtft_driver_remove_spi(struct spi_device *spi) \ +static void fbtft_driver_remove_spi(struct spi_device *spi) \ { \ struct fb_info *info = spi_get_drvdata(spi); \ \ fbtft_remove_common(&spi->dev, info); \ - return 0; \ } \ \ static struct spi_driver fbtft_driver_spi_driver = { \ diff --git a/drivers/staging/pi433/pi433_if.c b/drivers/staging/pi433/pi433_if.c index 68c09fa016ed3..1d31c35875e33 100644 --- a/drivers/staging/pi433/pi433_if.c +++ b/drivers/staging/pi433/pi433_if.c @@ -1264,7 +1264,7 @@ RX_failed: return retval; } -static int pi433_remove(struct spi_device *spi) +static void pi433_remove(struct spi_device *spi) { struct pi433_device *device = spi_get_drvdata(spi); @@ -1284,8 +1284,6 @@ static int pi433_remove(struct spi_device *spi) kfree(device->rx_buffer); kfree(device); - - return 0; } static const struct of_device_id pi433_dt_ids[] = { diff --git a/drivers/staging/wfx/bus_spi.c b/drivers/staging/wfx/bus_spi.c index 55ffcd7c42e27..fa0ff66a457df 100644 --- a/drivers/staging/wfx/bus_spi.c +++ b/drivers/staging/wfx/bus_spi.c @@ -232,12 +232,11 @@ static int wfx_spi_probe(struct spi_device *func) return wfx_probe(bus->core); } -static int wfx_spi_remove(struct spi_device *func) +static void wfx_spi_remove(struct spi_device *func) { struct wfx_spi_priv *bus = spi_get_drvdata(func); wfx_release(bus->core); - return 0; } /* For dynamic driver binding, kernel does not use OF to match driver. It only diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index 3c92d4e014887..516cff362434d 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -805,7 +805,7 @@ static int max3100_probe(struct spi_device *spi) return 0; } -static int max3100_remove(struct spi_device *spi) +static void max3100_remove(struct spi_device *spi) { struct max3100_port *s = spi_get_drvdata(spi); int i; @@ -828,13 +828,12 @@ static int max3100_remove(struct spi_device *spi) for (i = 0; i < MAX_MAX3100; i++) if (max3100s[i]) { mutex_unlock(&max3100s_lock); - return 0; + return; } pr_debug("removing max3100 driver\n"); uart_unregister_driver(&max3100_uart_driver); mutex_unlock(&max3100s_lock); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index dde0824b2fa52..3112b4a054485 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1487,10 +1487,9 @@ static int max310x_spi_probe(struct spi_device *spi) return max310x_probe(&spi->dev, devtype, regmap, spi->irq); } -static int max310x_spi_remove(struct spi_device *spi) +static void max310x_spi_remove(struct spi_device *spi) { max310x_remove(&spi->dev); - return 0; } static const struct spi_device_id max310x_id_table[] = { diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 64e7e6c8145f8..25d67b8c4db7d 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1440,11 +1440,9 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq); } -static int sc16is7xx_spi_remove(struct spi_device *spi) +static void sc16is7xx_spi_remove(struct spi_device *spi) { sc16is7xx_remove(&spi->dev); - - return 0; } static const struct spi_device_id sc16is7xx_spi_id_table[] = { diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c index d2a2b20cc1ad4..7d9bd16190c09 100644 --- a/drivers/usb/gadget/udc/max3420_udc.c +++ b/drivers/usb/gadget/udc/max3420_udc.c @@ -1292,7 +1292,7 @@ del_gadget: return err; } -static int max3420_remove(struct spi_device *spi) +static void max3420_remove(struct spi_device *spi) { struct max3420_udc *udc = spi_get_drvdata(spi); unsigned long flags; @@ -1304,8 +1304,6 @@ static int max3420_remove(struct spi_device *spi) kthread_stop(udc->thread_task); spin_unlock_irqrestore(&udc->lock, flags); - - return 0; } static const struct of_device_id max3420_udc_of_match[] = { diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index 30de85a707fef..99a5523a79fb9 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -1926,7 +1926,7 @@ error: return retval; } -static int +static void max3421_remove(struct spi_device *spi) { struct max3421_hcd *max3421_hcd; @@ -1947,7 +1947,6 @@ max3421_remove(struct spi_device *spi) free_irq(spi->irq, hcd); usb_put_hcd(hcd); - return 0; } static const struct of_device_id max3421_of_match_table[] = { diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index 8a4361e95a114..522dd81110b8e 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -506,12 +506,11 @@ static int ams369fg06_probe(struct spi_device *spi) return 0; } -static int ams369fg06_remove(struct spi_device *spi) +static void ams369fg06_remove(struct spi_device *spi) { struct ams369fg06 *lcd = spi_get_drvdata(spi); ams369fg06_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c index 33f5d80495e6f..0a57033ae31d1 100644 --- a/drivers/video/backlight/corgi_lcd.c +++ b/drivers/video/backlight/corgi_lcd.c @@ -542,7 +542,7 @@ static int corgi_lcd_probe(struct spi_device *spi) return 0; } -static int corgi_lcd_remove(struct spi_device *spi) +static void corgi_lcd_remove(struct spi_device *spi) { struct corgi_lcd *lcd = spi_get_drvdata(spi); @@ -550,7 +550,6 @@ static int corgi_lcd_remove(struct spi_device *spi) lcd->bl_dev->props.brightness = 0; backlight_update_status(lcd->bl_dev); corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN); - return 0; } static struct spi_driver corgi_lcd_driver = { diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c index 328aba9cddad1..e7b6bd827986f 100644 --- a/drivers/video/backlight/ili922x.c +++ b/drivers/video/backlight/ili922x.c @@ -526,10 +526,9 @@ static int ili922x_probe(struct spi_device *spi) return 0; } -static int ili922x_remove(struct spi_device *spi) +static void ili922x_remove(struct spi_device *spi) { ili922x_poweroff(spi); - return 0; } static struct spi_driver ili922x_driver = { diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index 46f97d1c3d219..cc763cf15f53e 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -223,12 +223,11 @@ static int l4f00242t03_probe(struct spi_device *spi) return 0; } -static int l4f00242t03_remove(struct spi_device *spi) +static void l4f00242t03_remove(struct spi_device *spi) { struct l4f00242t03_priv *priv = spi_get_drvdata(spi); l4f00242t03_lcd_power_set(priv->ld, FB_BLANK_POWERDOWN); - return 0; } static void l4f00242t03_shutdown(struct spi_device *spi) diff --git a/drivers/video/backlight/lms501kf03.c b/drivers/video/backlight/lms501kf03.c index f949b66dce1be..5c46df8022bf4 100644 --- a/drivers/video/backlight/lms501kf03.c +++ b/drivers/video/backlight/lms501kf03.c @@ -364,12 +364,11 @@ static int lms501kf03_probe(struct spi_device *spi) return 0; } -static int lms501kf03_remove(struct spi_device *spi) +static void lms501kf03_remove(struct spi_device *spi) { struct lms501kf03 *lcd = spi_get_drvdata(spi); lms501kf03_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c index 5cbf621e48bd3..b6d373af6e3f1 100644 --- a/drivers/video/backlight/ltv350qv.c +++ b/drivers/video/backlight/ltv350qv.c @@ -255,12 +255,11 @@ static int ltv350qv_probe(struct spi_device *spi) return 0; } -static int ltv350qv_remove(struct spi_device *spi) +static void ltv350qv_remove(struct spi_device *spi) { struct ltv350qv *lcd = spi_get_drvdata(spi); ltv350qv_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c index 0de044dcafd50..fc6fbaf855943 100644 --- a/drivers/video/backlight/tdo24m.c +++ b/drivers/video/backlight/tdo24m.c @@ -397,12 +397,11 @@ static int tdo24m_probe(struct spi_device *spi) return 0; } -static int tdo24m_remove(struct spi_device *spi) +static void tdo24m_remove(struct spi_device *spi) { struct tdo24m *lcd = spi_get_drvdata(spi); tdo24m_power(lcd, FB_BLANK_POWERDOWN); - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 38765544345b8..23d6c6bf0f543 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -232,15 +232,13 @@ err_register: return ret; } -static int tosa_lcd_remove(struct spi_device *spi) +static void tosa_lcd_remove(struct spi_device *spi) { struct tosa_lcd_data *data = spi_get_drvdata(spi); i2c_unregister_device(data->i2c); tosa_lcd_tg_off(data); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c index 3567b45f9ba9f..bfc1913e8b55e 100644 --- a/drivers/video/backlight/vgg2432a4.c +++ b/drivers/video/backlight/vgg2432a4.c @@ -233,11 +233,9 @@ static int vgg2432a4_probe(struct spi_device *spi) return 0; } -static int vgg2432a4_remove(struct spi_device *spi) +static void vgg2432a4_remove(struct spi_device *spi) { ili9320_remove(spi_get_drvdata(spi)); - - return 0; } static void vgg2432a4_shutdown(struct spi_device *spi) diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c index a75ae0c9b14c7..03cff39d392db 100644 --- a/drivers/video/fbdev/omap/lcd_mipid.c +++ b/drivers/video/fbdev/omap/lcd_mipid.c @@ -570,14 +570,12 @@ static int mipid_spi_probe(struct spi_device *spi) return 0; } -static int mipid_spi_remove(struct spi_device *spi) +static void mipid_spi_remove(struct spi_device *spi) { struct mipid_device *md = dev_get_drvdata(&spi->dev); mipid_disable(&md->panel); kfree(md); - - return 0; } static struct spi_driver mipid_spi_driver = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c index 1bec7a4422e80..aab67721263d8 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c @@ -316,7 +316,7 @@ err_gpio: return r; } -static int lb035q02_panel_spi_remove(struct spi_device *spi) +static void lb035q02_panel_spi_remove(struct spi_device *spi) { struct panel_drv_data *ddata = spi_get_drvdata(spi); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -328,8 +328,6 @@ static int lb035q02_panel_spi_remove(struct spi_device *spi) lb035q02_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id lb035q02_of_match[] = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c index dff9ebbadfc0f..be9910ff6e62d 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c @@ -327,7 +327,7 @@ err_gpio: return r; } -static int nec_8048_remove(struct spi_device *spi) +static void nec_8048_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -341,8 +341,6 @@ static int nec_8048_remove(struct spi_device *spi) nec_8048_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c index 8d8b5ff7d43c8..a909b5385ca5b 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c @@ -857,7 +857,7 @@ err_gpio: return r; } -static int acx565akm_remove(struct spi_device *spi) +static void acx565akm_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -874,8 +874,6 @@ static int acx565akm_remove(struct spi_device *spi) acx565akm_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id acx565akm_of_match[] = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c index 595ebd8bd5dcb..3c0f887d30926 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c @@ -425,7 +425,7 @@ err_reg: return r; } -static int td028ttec1_panel_remove(struct spi_device *spi) +static void td028ttec1_panel_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -439,8 +439,6 @@ static int td028ttec1_panel_remove(struct spi_device *spi) td028ttec1_panel_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id td028ttec1_of_match[] = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c index afac1d9445aa2..58bbba7c037fa 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c @@ -564,7 +564,7 @@ err_regulator: return r; } -static int tpo_td043_remove(struct spi_device *spi) +static void tpo_td043_remove(struct spi_device *spi) { struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -580,8 +580,6 @@ static int tpo_td043_remove(struct spi_device *spi) omap_dss_put_device(in); sysfs_remove_group(&spi->dev.kobj, &tpo_td043_attr_group); - - return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b8043..c84e61b99c7b0 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -280,7 +280,7 @@ struct spi_message; struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); - int (*remove)(struct spi_device *spi); + void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c index 9f8123893cc86..50eb6c0e66588 100644 --- a/sound/pci/hda/cs35l41_hda_spi.c +++ b/sound/pci/hda/cs35l41_hda_spi.c @@ -28,11 +28,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi) devm_regmap_init_spi(spi, &cs35l41_regmap_spi)); } -static int cs35l41_hda_spi_remove(struct spi_device *spi) +static void cs35l41_hda_spi_remove(struct spi_device *spi) { cs35l41_hda_remove(&spi->dev); - - return 0; } static const struct spi_device_id cs35l41_hda_spi_id[] = { diff --git a/sound/soc/codecs/adau1761-spi.c b/sound/soc/codecs/adau1761-spi.c index 655689c9778ac..7c9242c2ff94b 100644 --- a/sound/soc/codecs/adau1761-spi.c +++ b/sound/soc/codecs/adau1761-spi.c @@ -45,10 +45,9 @@ static int adau1761_spi_probe(struct spi_device *spi) id->driver_data, adau1761_spi_switch_mode); } -static int adau1761_spi_remove(struct spi_device *spi) +static void adau1761_spi_remove(struct spi_device *spi) { adau17x1_remove(&spi->dev); - return 0; } static const struct spi_device_id adau1761_spi_id[] = { diff --git a/sound/soc/codecs/adau1781-spi.c b/sound/soc/codecs/adau1781-spi.c index bb5613574786b..1a09633d5a885 100644 --- a/sound/soc/codecs/adau1781-spi.c +++ b/sound/soc/codecs/adau1781-spi.c @@ -45,10 +45,9 @@ static int adau1781_spi_probe(struct spi_device *spi) id->driver_data, adau1781_spi_switch_mode); } -static int adau1781_spi_remove(struct spi_device *spi) +static void adau1781_spi_remove(struct spi_device *spi) { adau17x1_remove(&spi->dev); - return 0; } static const struct spi_device_id adau1781_spi_id[] = { diff --git a/sound/soc/codecs/cs35l41-spi.c b/sound/soc/codecs/cs35l41-spi.c index 6dfd5459aa207..169221a5b09f7 100644 --- a/sound/soc/codecs/cs35l41-spi.c +++ b/sound/soc/codecs/cs35l41-spi.c @@ -55,13 +55,11 @@ static int cs35l41_spi_probe(struct spi_device *spi) return cs35l41_probe(cs35l41, pdata); } -static int cs35l41_spi_remove(struct spi_device *spi) +static void cs35l41_spi_remove(struct spi_device *spi) { struct cs35l41_private *cs35l41 = spi_get_drvdata(spi); cs35l41_remove(cs35l41); - - return 0; } #ifdef CONFIG_OF diff --git a/sound/soc/codecs/pcm3168a-spi.c b/sound/soc/codecs/pcm3168a-spi.c index ecd379f308e6b..b5b08046f5454 100644 --- a/sound/soc/codecs/pcm3168a-spi.c +++ b/sound/soc/codecs/pcm3168a-spi.c @@ -26,11 +26,9 @@ static int pcm3168a_spi_probe(struct spi_device *spi) return pcm3168a_probe(&spi->dev, regmap); } -static int pcm3168a_spi_remove(struct spi_device *spi) +static void pcm3168a_spi_remove(struct spi_device *spi) { pcm3168a_remove(&spi->dev); - - return 0; } static const struct spi_device_id pcm3168a_spi_id[] = { diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 7cf559b47e1c3..4d29e71963800 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -26,10 +26,9 @@ static int pcm512x_spi_probe(struct spi_device *spi) return pcm512x_probe(&spi->dev, regmap); } -static int pcm512x_spi_remove(struct spi_device *spi) +static void pcm512x_spi_remove(struct spi_device *spi) { pcm512x_remove(&spi->dev); - return 0; } static const struct spi_device_id pcm512x_spi_id[] = { diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c index a8958cd1c6921..03cce8d6404f8 100644 --- a/sound/soc/codecs/tlv320aic32x4-spi.c +++ b/sound/soc/codecs/tlv320aic32x4-spi.c @@ -46,11 +46,9 @@ static int aic32x4_spi_probe(struct spi_device *spi) return aic32x4_probe(&spi->dev, regmap); } -static int aic32x4_spi_remove(struct spi_device *spi) +static void aic32x4_spi_remove(struct spi_device *spi) { aic32x4_remove(&spi->dev); - - return 0; } static const struct spi_device_id aic32x4_spi_id[] = { diff --git a/sound/soc/codecs/tlv320aic3x-spi.c b/sound/soc/codecs/tlv320aic3x-spi.c index 494e844022321..deed6ec7e0816 100644 --- a/sound/soc/codecs/tlv320aic3x-spi.c +++ b/sound/soc/codecs/tlv320aic3x-spi.c @@ -35,11 +35,9 @@ static int aic3x_spi_probe(struct spi_device *spi) return aic3x_probe(&spi->dev, regmap, id->driver_data); } -static int aic3x_spi_remove(struct spi_device *spi) +static void aic3x_spi_remove(struct spi_device *spi) { aic3x_remove(&spi->dev); - - return 0; } static const struct spi_device_id aic3x_spi_id[] = { diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index 28b4656c4e149..1bef1c500c8e3 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -969,7 +969,7 @@ static int wm0010_spi_probe(struct spi_device *spi) return 0; } -static int wm0010_spi_remove(struct spi_device *spi) +static void wm0010_spi_remove(struct spi_device *spi) { struct wm0010_priv *wm0010 = spi_get_drvdata(spi); @@ -980,8 +980,6 @@ static int wm0010_spi_remove(struct spi_device *spi) if (wm0010->irq) free_irq(wm0010->irq, wm0010); - - return 0; } static struct spi_driver wm0010_spi_driver = { diff --git a/sound/soc/codecs/wm8804-spi.c b/sound/soc/codecs/wm8804-spi.c index 9a8da1511c34b..628568724c200 100644 --- a/sound/soc/codecs/wm8804-spi.c +++ b/sound/soc/codecs/wm8804-spi.c @@ -24,10 +24,9 @@ static int wm8804_spi_probe(struct spi_device *spi) return wm8804_probe(&spi->dev, regmap); } -static int wm8804_spi_remove(struct spi_device *spi) +static void wm8804_spi_remove(struct spi_device *spi) { wm8804_remove(&spi->dev); - return 0; } static const struct of_device_id wm8804_of_match[] = { diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c index 76c0e37a838cf..56d2c712e2571 100644 --- a/sound/spi/at73c213.c +++ b/sound/spi/at73c213.c @@ -1001,7 +1001,7 @@ out: return retval; } -static int snd_at73c213_remove(struct spi_device *spi) +static void snd_at73c213_remove(struct spi_device *spi) { struct snd_card *card = dev_get_drvdata(&spi->dev); struct snd_at73c213 *chip = card->private_data; @@ -1066,8 +1066,6 @@ out: ssc_free(chip->ssc); snd_card_free(card); - - return 0; } #ifdef CONFIG_PM_SLEEP -- GitLab From 1f8863bfb5ca500ea1c7669b16b1931ba27fce20 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:02:59 +0000 Subject: [PATCH 0322/1586] genirq: Allow the PM device to originate from irq domain As a preparation to moving the reference to the device used for runtime power management, add a new 'dev' field to the irqdomain structure for that exact purpose. The irq_chip_pm_{get,put}() helpers are made aware of the dual location via a new private helper. No functional change intended. Signed-off-by: Marc Zyngier Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Tested-by: Tony Lindgren Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-2-maz@kernel.org --- include/linux/irqdomain.h | 10 ++++++++++ kernel/irq/chip.c | 23 ++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d476405802e97..be25a33293e57 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -151,6 +151,8 @@ struct irq_domain_chip_generic; * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to a device that the domain represent, and that will be + * used for power management purposes. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * * Revmap data, used internally by irq_domain @@ -171,6 +173,7 @@ struct irq_domain { struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; + struct device *dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif @@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } +static inline void irq_domain_set_pm_device(struct irq_domain *d, + struct device *dev) +{ + if (d) + d->dev = dev; +} + #ifdef CONFIG_IRQ_DOMAIN struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, const char *name, phys_addr_t *pa); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c093246630882..a2a12cdbe8725 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1558,6 +1558,17 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return 0; } +static struct device *irq_get_parent_device(struct irq_data *data) +{ + if (data->chip->parent_device) + return data->chip->parent_device; + + if (data->domain) + return data->domain->dev; + + return NULL; +} + /** * irq_chip_pm_get - Enable power for an IRQ chip * @data: Pointer to interrupt specific data @@ -1567,12 +1578,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ int irq_chip_pm_get(struct irq_data *data) { + struct device *dev = irq_get_parent_device(data); int retval; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { - retval = pm_runtime_get_sync(data->chip->parent_device); + if (IS_ENABLED(CONFIG_PM) && dev) { + retval = pm_runtime_get_sync(dev); if (retval < 0) { - pm_runtime_put_noidle(data->chip->parent_device); + pm_runtime_put_noidle(dev); return retval; } } @@ -1590,10 +1602,11 @@ int irq_chip_pm_get(struct irq_data *data) */ int irq_chip_pm_put(struct irq_data *data) { + struct device *dev = irq_get_parent_device(data); int retval = 0; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) - retval = pm_runtime_put(data->chip->parent_device); + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_put(dev); return (retval < 0) ? retval : 0; } -- GitLab From e95f3efdeb499accf2b05333a1eac7862f5a10f6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:00 +0000 Subject: [PATCH 0323/1586] irqchip/gic: Move PM device over to irq domain Move the reference to the GIC device over to the irq domain. This allows for some localised cleanup. Signed-off-by: Marc Zyngier Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-3-maz@kernel.org --- drivers/irqchip/irq-gic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index b8bb46c65a97a..fb741b42ca2d2 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1127,13 +1127,12 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .unmap = gic_irq_domain_unmap, }; -static void gic_init_chip(struct gic_chip_data *gic, struct device *dev, - const char *name, bool use_eoimode1) +static void gic_init_chip(struct gic_chip_data *gic, const char *name, + bool use_eoimode1) { /* Initialize irq_chip */ gic->chip = gic_chip; gic->chip.name = name; - gic->chip.parent_device = dev; if (use_eoimode1) { gic->chip.irq_mask = gic_eoimode1_mask_irq; @@ -1268,10 +1267,10 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) { name = kasprintf(GFP_KERNEL, "GICv2"); - gic_init_chip(gic, NULL, name, true); + gic_init_chip(gic, name, true); } else { name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0])); - gic_init_chip(gic, NULL, name, false); + gic_init_chip(gic, name, false); } ret = gic_init_bases(gic, handle); @@ -1460,7 +1459,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq) if (!*gic) return -ENOMEM; - gic_init_chip(*gic, dev, dev->of_node->name, false); + gic_init_chip(*gic, dev->of_node->name, false); ret = gic_of_setup(*gic, dev->of_node); if (ret) @@ -1472,6 +1471,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq) return ret; } + irq_domain_set_pm_device((*gic)->domain, dev); irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic); return 0; -- GitLab From c2ea6b9b03c1acde89c6100aff894e64386e72cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:01 +0000 Subject: [PATCH 0324/1586] irqchip/renesas-intc-irqpin: Move PM device over to irq domain Move the reference to the device over to the irq domain. Signed-off-by: Marc Zyngier Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-4-maz@kernel.org --- drivers/irqchip/irq-renesas-intc-irqpin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 37f9a4499fdb3..e83756aca14e0 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -508,7 +508,6 @@ static int intc_irqpin_probe(struct platform_device *pdev) irq_chip = &p->irq_chip; irq_chip->name = "intc-irqpin"; - irq_chip->parent_device = dev; irq_chip->irq_mask = disable_fn; irq_chip->irq_unmask = enable_fn; irq_chip->irq_set_type = intc_irqpin_irq_set_type; @@ -523,6 +522,8 @@ static int intc_irqpin_probe(struct platform_device *pdev) goto err0; } + irq_domain_set_pm_device(p->irq_domain, dev); + if (p->shared_irqs) { /* request one shared interrupt */ if (devm_request_irq(dev, p->irq[0].requested_irq, -- GitLab From c3ec838e3a390e62cb71a11041db43d0b3b42c99 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:02 +0000 Subject: [PATCH 0325/1586] irqchip/renesas-irqc: Move PM device over to irq domain Move the reference to the device over to the irq domain. Signed-off-by: Marc Zyngier Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-5-maz@kernel.org --- drivers/irqchip/irq-renesas-irqc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 909325f88239d..1ee5e9941f671 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -188,13 +188,14 @@ static int irqc_probe(struct platform_device *pdev) p->gc->reg_base = p->cpu_int_base; p->gc->chip_types[0].regs.enable = IRQC_EN_SET; p->gc->chip_types[0].regs.disable = IRQC_EN_STS; - p->gc->chip_types[0].chip.parent_device = dev; p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg; p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg; p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type; p->gc->chip_types[0].chip.irq_set_wake = irqc_irq_set_wake; p->gc->chip_types[0].chip.flags = IRQCHIP_MASK_ON_SUSPEND; + irq_domain_set_pm_device(p->irq_domain, dev); + /* request interrupts one by one */ for (k = 0; k < p->number_of_irqs; k++) { if (devm_request_irq(dev, p->irq[k].requested_irq, -- GitLab From fb140b9c0fe1109ac2269443216d114751a030c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:03 +0000 Subject: [PATCH 0326/1586] irqchip/imx-intmux: Move PM device over to irq domain Move the reference to the device over to the irq domain. This allows the irq_chip structure to be directly used instead of taking a copy for each instance. Signed-off-by: Marc Zyngier Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-6-maz@kernel.org --- drivers/irqchip/irq-imx-intmux.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c index e86ff743e98c3..80aaea82468aa 100644 --- a/drivers/irqchip/irq-imx-intmux.c +++ b/drivers/irqchip/irq-imx-intmux.c @@ -61,7 +61,6 @@ #define CHAN_MAX_NUM 0x8 struct intmux_irqchip_data { - struct irq_chip chip; u32 saved_reg; int chanidx; int irq; @@ -114,7 +113,7 @@ static void imx_intmux_irq_unmask(struct irq_data *d) raw_spin_unlock_irqrestore(&data->lock, flags); } -static struct irq_chip imx_intmux_irq_chip = { +static struct irq_chip imx_intmux_irq_chip __ro_after_init = { .name = "intmux", .irq_mask = imx_intmux_irq_mask, .irq_unmask = imx_intmux_irq_unmask, @@ -126,7 +125,7 @@ static int imx_intmux_irq_map(struct irq_domain *h, unsigned int irq, struct intmux_irqchip_data *data = h->host_data; irq_set_chip_data(irq, data); - irq_set_chip_and_handler(irq, &data->chip, handle_level_irq); + irq_set_chip_and_handler(irq, &imx_intmux_irq_chip, handle_level_irq); return 0; } @@ -241,8 +240,6 @@ static int imx_intmux_probe(struct platform_device *pdev) } for (i = 0; i < channum; i++) { - data->irqchip_data[i].chip = imx_intmux_irq_chip; - data->irqchip_data[i].chip.parent_device = &pdev->dev; data->irqchip_data[i].chanidx = i; data->irqchip_data[i].irq = irq_of_parse_and_map(np, i); @@ -260,6 +257,7 @@ static int imx_intmux_probe(struct platform_device *pdev) goto out; } data->irqchip_data[i].domain = domain; + irq_domain_set_pm_device(domain, &pdev->dev); /* disable all interrupt sources of this channel firstly */ writel_relaxed(0, data->regs + CHANIER(i)); -- GitLab From 4b9558f92036c968119e1de383f604c19b3ca99b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:04 +0000 Subject: [PATCH 0327/1586] gpio: mt7621: Kill parent_device usage This gpio controller sets the parent_device field, but doesn't have any runtime PM functionality. Get rid of it. Signed-off-by: Marc Zyngier Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-7-maz@kernel.org --- drivers/gpio/gpio-mt7621.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c index ccaad1cb3c2e9..d8a26e503ca5d 100644 --- a/drivers/gpio/gpio-mt7621.c +++ b/drivers/gpio/gpio-mt7621.c @@ -239,7 +239,6 @@ mediatek_gpio_bank_probe(struct device *dev, int bank) rg->chip.offset = bank * MTK_BANK_WIDTH; rg->irq_chip.name = dev_name(dev); - rg->irq_chip.parent_device = dev; rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask; rg->irq_chip.irq_mask = mediatek_gpio_irq_mask; rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask; -- GitLab From 989c78f25ade0af66426b935f1113d4b0fe390c5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:05 +0000 Subject: [PATCH 0328/1586] gpio: omap: Move PM device over to irq domain Move the reference to the device over to the irq domain. Signed-off-by: Marc Zyngier Tested-by: Tony Lindgren Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-8-maz@kernel.org --- drivers/gpio/gpio-omap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index e099c39e0355d..80ddc43fd875b 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -986,7 +986,8 @@ static void omap_gpio_mod_init(struct gpio_bank *bank) writel_relaxed(0, base + bank->regs->ctrl); } -static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) +static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc, + struct device *pm_dev) { struct gpio_irq_chip *irq; static int gpio; @@ -1052,6 +1053,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) if (ret) return dev_err_probe(bank->chip.parent, ret, "Could not register gpio chip\n"); + irq_domain_set_pm_device(bank->chip.irq.domain, pm_dev); ret = devm_request_irq(bank->chip.parent, bank->irq, omap_gpio_irq_handler, 0, dev_name(bank->chip.parent), bank); @@ -1402,7 +1404,6 @@ static int omap_gpio_probe(struct platform_device *pdev) irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock, irqc->name = dev_name(&pdev->dev); irqc->flags = IRQCHIP_MASK_ON_SUSPEND; - irqc->parent_device = dev; bank->irq = platform_get_irq(pdev, 0); if (bank->irq <= 0) { @@ -1466,7 +1467,7 @@ static int omap_gpio_probe(struct platform_device *pdev) omap_gpio_mod_init(bank); - ret = omap_gpio_chip_init(bank, irqc); + ret = omap_gpio_chip_init(bank, irqc, dev); if (ret) { pm_runtime_put_sync(dev); pm_runtime_disable(dev); -- GitLab From 373d664b7d3babe1743f64746bc3c553ac23a1bd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:06 +0000 Subject: [PATCH 0329/1586] gpio: rcar: Move PM device over to irq domain Move the reference to the device over to the irq domain. Signed-off-by: Marc Zyngier Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-9-maz@kernel.org --- drivers/gpio/gpio-rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index bd2e16d6e21c4..3a76538f27fad 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -530,7 +530,6 @@ static int gpio_rcar_probe(struct platform_device *pdev) irq_chip = &p->irq_chip; irq_chip->name = "gpio-rcar"; - irq_chip->parent_device = dev; irq_chip->irq_mask = gpio_rcar_irq_disable; irq_chip->irq_unmask = gpio_rcar_irq_enable; irq_chip->irq_set_type = gpio_rcar_irq_set_type; @@ -552,6 +551,7 @@ static int gpio_rcar_probe(struct platform_device *pdev) goto err0; } + irq_domain_set_pm_device(gpio_chip->irq.domain, dev); ret = devm_request_irq(dev, p->irq_parent, gpio_rcar_irq_handler, IRQF_SHARED, name, p); if (ret) { -- GitLab From 924610607f191bee4379bc3775b0fd025ad7e922 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:07 +0000 Subject: [PATCH 0330/1586] gpio: tpmx86: Move PM device over to irq domain Move the reference to the device over to the irq domain. Signed-off-by: Marc Zyngier Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-10-maz@kernel.org --- drivers/gpio/gpio-tqmx86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5b103221b58dd..fa4bc7481f9a6 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -281,7 +281,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) u8 irq_status; irq_chip->name = chip->label; - irq_chip->parent_device = &pdev->dev; irq_chip->irq_mask = tqmx86_gpio_irq_mask; irq_chip->irq_unmask = tqmx86_gpio_irq_unmask; irq_chip->irq_set_type = tqmx86_gpio_irq_set_type; @@ -316,6 +315,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) goto out_pm_dis; } + irq_domain_set_pm_device(girq->domain, dev); + dev_info(dev, "GPIO functionality initialized with %d pins\n", chip->ngpio); -- GitLab From d335092933079e0a48c61ea5791906d040105a4d Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Wed, 2 Feb 2022 15:00:03 +0100 Subject: [PATCH 0331/1586] dt-bindings: interrupt-controller: stm32-exti: document st,stm32mp13-exti Support of STM32MP13 SoC implies to create a new compatible in order to manage EXTI/GIC mapping changes. Signed-off-by: Alexandre Torgue Acked-by: Rob Herring Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220202140005.860-2-alexandre.torgue@foss.st.com --- .../devicetree/bindings/interrupt-controller/st,stm32-exti.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml index d19c881b4abc2..e44daa09b137c 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml @@ -20,6 +20,7 @@ properties: - items: - enum: - st,stm32mp1-exti + - st,stm32mp13-exti - const: syscon "#interrupt-cells": -- GitLab From 04133bb1e710bc3d5532694999fbb3d0f1421724 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Wed, 2 Feb 2022 15:00:04 +0100 Subject: [PATCH 0332/1586] irqchip/stm32-exti: Add STM32MP13 support Enhance stm32-exti driver to support STM32MP13 SoC. This SoC uses the same hardware version than STM32MP15. Only EXTI line mapping is changed and following EXTI lines are supported: GPIO, RTC, I2C[1-5], UxART[1-8], USBH_EHCI, USBH_OHCI, USB_OTG, LPTIM[1-5], ETH[1-2]. Signed-off-by: Alexandre Torgue Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220202140005.860-3-alexandre.torgue@foss.st.com --- drivers/irqchip/irq-stm32-exti.c | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index b7cb2da718880..9d18f47040eb7 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -214,6 +214,48 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = { { .exti = 73, .irq_parent = 129, .chip = &stm32_exti_h_chip }, }; +static const struct stm32_desc_irq stm32mp13_desc_irq[] = { + { .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip }, + { .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip }, + { .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip }, + { .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip }, + { .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip }, + { .exti = 5, .irq_parent = 24, .chip = &stm32_exti_h_chip }, + { .exti = 6, .irq_parent = 65, .chip = &stm32_exti_h_chip }, + { .exti = 7, .irq_parent = 66, .chip = &stm32_exti_h_chip }, + { .exti = 8, .irq_parent = 67, .chip = &stm32_exti_h_chip }, + { .exti = 9, .irq_parent = 68, .chip = &stm32_exti_h_chip }, + { .exti = 10, .irq_parent = 41, .chip = &stm32_exti_h_chip }, + { .exti = 11, .irq_parent = 43, .chip = &stm32_exti_h_chip }, + { .exti = 12, .irq_parent = 77, .chip = &stm32_exti_h_chip }, + { .exti = 13, .irq_parent = 78, .chip = &stm32_exti_h_chip }, + { .exti = 14, .irq_parent = 106, .chip = &stm32_exti_h_chip }, + { .exti = 15, .irq_parent = 109, .chip = &stm32_exti_h_chip }, + { .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip }, + { .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct }, + { .exti = 21, .irq_parent = 32, .chip = &stm32_exti_h_chip_direct }, + { .exti = 22, .irq_parent = 34, .chip = &stm32_exti_h_chip_direct }, + { .exti = 23, .irq_parent = 73, .chip = &stm32_exti_h_chip_direct }, + { .exti = 24, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct }, + { .exti = 25, .irq_parent = 114, .chip = &stm32_exti_h_chip_direct }, + { .exti = 26, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct }, + { .exti = 27, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct }, + { .exti = 28, .irq_parent = 40, .chip = &stm32_exti_h_chip_direct }, + { .exti = 29, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct }, + { .exti = 30, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct }, + { .exti = 31, .irq_parent = 54, .chip = &stm32_exti_h_chip_direct }, + { .exti = 32, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct }, + { .exti = 33, .irq_parent = 84, .chip = &stm32_exti_h_chip_direct }, + { .exti = 44, .irq_parent = 96, .chip = &stm32_exti_h_chip_direct }, + { .exti = 47, .irq_parent = 92, .chip = &stm32_exti_h_chip_direct }, + { .exti = 48, .irq_parent = 116, .chip = &stm32_exti_h_chip_direct }, + { .exti = 50, .irq_parent = 117, .chip = &stm32_exti_h_chip_direct }, + { .exti = 52, .irq_parent = 118, .chip = &stm32_exti_h_chip_direct }, + { .exti = 53, .irq_parent = 119, .chip = &stm32_exti_h_chip_direct }, + { .exti = 68, .irq_parent = 63, .chip = &stm32_exti_h_chip_direct }, + { .exti = 70, .irq_parent = 98, .chip = &stm32_exti_h_chip_direct }, +}; + static const struct stm32_exti_drv_data stm32mp1_drv_data = { .exti_banks = stm32mp1_exti_banks, .bank_nr = ARRAY_SIZE(stm32mp1_exti_banks), @@ -221,6 +263,13 @@ static const struct stm32_exti_drv_data stm32mp1_drv_data = { .irq_nr = ARRAY_SIZE(stm32mp1_desc_irq), }; +static const struct stm32_exti_drv_data stm32mp13_drv_data = { + .exti_banks = stm32mp1_exti_banks, + .bank_nr = ARRAY_SIZE(stm32mp1_exti_banks), + .desc_irqs = stm32mp13_desc_irq, + .irq_nr = ARRAY_SIZE(stm32mp13_desc_irq), +}; + static const struct stm32_desc_irq *stm32_exti_get_desc(const struct stm32_exti_drv_data *drv_data, irq_hw_number_t hwirq) @@ -922,6 +971,7 @@ static int stm32_exti_probe(struct platform_device *pdev) /* platform driver only for MP1 */ static const struct of_device_id stm32_exti_ids[] = { { .compatible = "st,stm32mp1-exti", .data = &stm32mp1_drv_data}, + { .compatible = "st,stm32mp13-exti", .data = &stm32mp13_drv_data}, {}, }; MODULE_DEVICE_TABLE(of, stm32_exti_ids); -- GitLab From 8e7c8ca6b988904d4c32c4053b325739738c8f36 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 20 Sep 2021 10:27:16 -0700 Subject: [PATCH 0333/1586] test_overflow: Regularize test reporting output Report test run summaries more regularly, so it's easier to understand the output: - Remove noisy "ok" reports for shift and allocator tests. - Reorganize per-type output to the end of each type's tests. - Replace redundant vmalloc tests with __vmalloc so that __GFP_NO_WARN can be used to keep the expected failure warnings out of dmesg, similar to commit 8e060c21ae2c ("lib/test_overflow.c: avoid tainting the kernel and fix wrap size") Resulting output: test_overflow: 18 u8 arithmetic tests finished test_overflow: 19 s8 arithmetic tests finished test_overflow: 17 u16 arithmetic tests finished test_overflow: 17 s16 arithmetic tests finished test_overflow: 17 u32 arithmetic tests finished test_overflow: 17 s32 arithmetic tests finished test_overflow: 17 u64 arithmetic tests finished test_overflow: 21 s64 arithmetic tests finished test_overflow: 113 shift tests finished test_overflow: 17 overflow size helper tests finished test_overflow: 11 allocation overflow tests finished test_overflow: all tests passed Acked-by: Rasmus Villemoes Link: https://lore.kernel.org/all/eb6d02ae-e2ed-e7bd-c700-8a6d004d84ce@rasmusvillemoes.dk/ Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/all/CAKwvOdnYYa+72VhtJ4ug=SJVFn7w+n7Th+hKYE87BRDt4hvqOg@mail.gmail.com/ Signed-off-by: Kees Cook --- lib/test_overflow.c | 54 +++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/lib/test_overflow.c b/lib/test_overflow.c index 7a4b6f6c5473c..cea37ae826153 100644 --- a/lib/test_overflow.c +++ b/lib/test_overflow.c @@ -252,10 +252,10 @@ static int __init test_ ## t ## _overflow(void) { \ int err = 0; \ unsigned i; \ \ - pr_info("%-3s: %zu arithmetic tests\n", #t, \ - ARRAY_SIZE(t ## _tests)); \ for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ err |= do_test_ ## t(&t ## _tests[i]); \ + pr_info("%zu %s arithmetic tests finished\n", \ + ARRAY_SIZE(t ## _tests), #t); \ return err; \ } @@ -291,6 +291,7 @@ static int __init test_overflow_calculation(void) static int __init test_overflow_shift(void) { int err = 0; + int count = 0; /* Args are: value, shift, type, expected result, overflow expected */ #define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \ @@ -313,9 +314,7 @@ static int __init test_overflow_shift(void) pr_warn("got %llu\n", (u64)__d); \ __failed = 1; \ } \ - if (!__failed) \ - pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s, \ - of ? "overflow" : #expect); \ + count++; \ __failed; \ }) @@ -479,6 +478,10 @@ static int __init test_overflow_shift(void) err |= TEST_ONE_SHIFT(0, 31, s32, 0, false); err |= TEST_ONE_SHIFT(0, 63, s64, 0, false); + pr_info("%d shift tests finished\n", count); + +#undef TEST_ONE_SHIFT + return err; } @@ -530,7 +533,6 @@ static int __init test_ ## func (void *arg) \ free ## want_arg (free_func, arg, ptr); \ return 1; \ } \ - pr_info(#func " detected saturation\n"); \ return 0; \ } @@ -544,10 +546,7 @@ DEFINE_TEST_ALLOC(kmalloc, kfree, 0, 1, 0); DEFINE_TEST_ALLOC(kmalloc_node, kfree, 0, 1, 1); DEFINE_TEST_ALLOC(kzalloc, kfree, 0, 1, 0); DEFINE_TEST_ALLOC(kzalloc_node, kfree, 0, 1, 1); -DEFINE_TEST_ALLOC(vmalloc, vfree, 0, 0, 0); -DEFINE_TEST_ALLOC(vmalloc_node, vfree, 0, 0, 1); -DEFINE_TEST_ALLOC(vzalloc, vfree, 0, 0, 0); -DEFINE_TEST_ALLOC(vzalloc_node, vfree, 0, 0, 1); +DEFINE_TEST_ALLOC(__vmalloc, vfree, 0, 1, 0); DEFINE_TEST_ALLOC(kvmalloc, kvfree, 0, 1, 0); DEFINE_TEST_ALLOC(kvmalloc_node, kvfree, 0, 1, 1); DEFINE_TEST_ALLOC(kvzalloc, kvfree, 0, 1, 0); @@ -559,8 +558,14 @@ static int __init test_overflow_allocation(void) { const char device_name[] = "overflow-test"; struct device *dev; + int count = 0; int err = 0; +#define check_allocation_overflow(alloc) ({ \ + count++; \ + test_ ## alloc(dev); \ +}) + /* Create dummy device for devm_kmalloc()-family tests. */ dev = root_device_register(device_name); if (IS_ERR(dev)) { @@ -568,23 +573,24 @@ static int __init test_overflow_allocation(void) return 1; } - err |= test_kmalloc(NULL); - err |= test_kmalloc_node(NULL); - err |= test_kzalloc(NULL); - err |= test_kzalloc_node(NULL); - err |= test_kvmalloc(NULL); - err |= test_kvmalloc_node(NULL); - err |= test_kvzalloc(NULL); - err |= test_kvzalloc_node(NULL); - err |= test_vmalloc(NULL); - err |= test_vmalloc_node(NULL); - err |= test_vzalloc(NULL); - err |= test_vzalloc_node(NULL); - err |= test_devm_kmalloc(dev); - err |= test_devm_kzalloc(dev); + err |= check_allocation_overflow(kmalloc); + err |= check_allocation_overflow(kmalloc_node); + err |= check_allocation_overflow(kzalloc); + err |= check_allocation_overflow(kzalloc_node); + err |= check_allocation_overflow(__vmalloc); + err |= check_allocation_overflow(kvmalloc); + err |= check_allocation_overflow(kvmalloc_node); + err |= check_allocation_overflow(kvzalloc); + err |= check_allocation_overflow(kvzalloc_node); + err |= check_allocation_overflow(devm_kmalloc); + err |= check_allocation_overflow(devm_kzalloc); device_unregister(dev); + pr_info("%d allocation overflow tests finished\n", count); + +#undef check_allocation_overflow + return err; } -- GitLab From c9edbe1eb98248c290d93aa2ffdc30cab5e2e62c Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 29 Dec 2021 20:44:19 +0900 Subject: [PATCH 0334/1586] docs: sphinx/kfigure.py: Use rsvg-convert(1) for DOT -> PDF conversion On openSUSE, dot(1) command does not support direct PDF output. On other distros, generated PDF images have unnecessarily wide margins, especially for small graphs. By using dot(1) for DOT -> SVG, then rsvg-convert(1) for SVG -> PDF, more optimal PDF images can be obtained, with the bonus of improved portability across various distros. Add rules in kfigure.py so that the above mentioned route is taken when rsvg-convert(1) is available. Note that rsvg-convert(1) is recommended by sphinx_pre_install. So it is most likely that existing systems for building pdfdocs have rsvg-convert(1) installed. Note: SVG features supported by rsvg-convert(1) vary depending on its version and distro config. For example, the one found on Ubuntu Bionic (version 2.40.20) does poor job in rendering some of SVG files drawn by Inkscape. SVG files generated by dot(1) are converted nicely even with such old versions of rsvg-convert. So this change does not affect the quality of such figures in any way. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/15b56dd3-081a-2469-c3a4-dfc1ca4c6c2d@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kfigure.py | 46 +++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index 3c78828330be0..955e3ec5de5a6 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -31,6 +31,8 @@ u""" * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not available, the DOT language is inserted as literal-block. + For conversion to PDF, ``rsvg-convert(1)`` of librsvg + (https://gitlab.gnome.org/GNOME/librsvg) is used when available. * SVG to PDF: To generate PDF, you need at least one of this tools: @@ -113,6 +115,9 @@ dot_cmd = None # ImageMagick' convert(1) support convert_cmd = None +# librsvg's rsvg-convert(1) support +rsvg_convert_cmd = None + def setup(app): # check toolchain first @@ -160,11 +165,12 @@ def setupTools(app): This function is called once, when the builder is initiated. """ - global dot_cmd, convert_cmd # pylint: disable=W0603 + global dot_cmd, convert_cmd, rsvg_convert_cmd # pylint: disable=W0603 kernellog.verbose(app, "kfigure: check installed tools ...") dot_cmd = which('dot') convert_cmd = which('convert') + rsvg_convert_cmd = which('rsvg-convert') if dot_cmd: kernellog.verbose(app, "use dot(1) from: " + dot_cmd) @@ -177,6 +183,11 @@ def setupTools(app): kernellog.warn(app, "convert(1) not found, for SVG to PDF conversion install " "ImageMagick (https://www.imagemagick.org)") + if rsvg_convert_cmd: + kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd) + else: + kernellog.verbose(app, "rsvg-convert(1) not found, " + "falling back to raster image conversion") # integrate conversion tools @@ -266,7 +277,13 @@ def convert_image(img_node, translator, src_fname=None): if in_ext == '.dot': kernellog.verbose(app, 'convert DOT to: {out}/' + _name) - ok = dot2format(app, src_fname, dst_fname) + if translator.builder.format == 'latex': + svg_fname = path.join(translator.builder.outdir, fname + '.svg') + ok1 = dot2format(app, src_fname, svg_fname) + ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname) + ok = ok1 and ok2 + else: + ok = dot2format(app, src_fname, dst_fname) elif in_ext == '.svg': kernellog.verbose(app, 'convert SVG to: {out}/' + _name) @@ -319,6 +336,31 @@ def svg2pdf(app, svg_fname, pdf_fname): kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) return bool(exit_code == 0) +def svg2pdf_by_rsvg(app, svg_fname, pdf_fname): + """Convert SVG to PDF with ``rsvg-convert(1)`` command. + + * ``svg_fname`` pathname of input SVG file, including extension ``.svg`` + * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf`` + + Input SVG file should be the one generated by ``dot2format()``. + SVG -> PDF conversion is done by ``rsvg-convert(1)``. + + If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``. + + """ + + if rsvg_convert_cmd is None: + ok = svg2pdf(app, svg_fname, pdf_fname) + else: + cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname] + # use stdout and stderr from parent + exit_code = subprocess.call(cmd) + if exit_code != 0: + kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) + ok = bool(exit_code == 0) + + return ok + # image handling # --------------------- -- GitLab From ecf5fb58cdcd93b9cf555b95da4ef73b1297de4c Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 29 Dec 2021 20:45:29 +0900 Subject: [PATCH 0335/1586] docs: sphinx/kfigure.py: Add check of 'dot -Tpdf' To prevent any regression on existing build systems, limit the fallback of converting DOT -> raster PDF only when both of the following conditions are met. o dot(1) doesn't support -Tpdf o rsvg-convert(1) is not found While we are here, add kernellog.verbose messages related to rsvg-convert, 'dot -Tpdf', and 'dot -Tsvg' commands. Suggested-by: Mauro Carvalho Chehab Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/e76f61e1-7366-ba00-b119-8ea6a2499861@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kfigure.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index 955e3ec5de5a6..77b0d15dba317 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -51,6 +51,7 @@ import os from os import path import subprocess from hashlib import sha1 +import re from docutils import nodes from docutils.statemachine import ViewList from docutils.parsers.rst import directives @@ -111,6 +112,8 @@ def pass_handle(self, node): # pylint: disable=W0613 # Graphviz's dot(1) support dot_cmd = None +# dot(1) -Tpdf should be used +dot_Tpdf = False # ImageMagick' convert(1) support convert_cmd = None @@ -165,7 +168,7 @@ def setupTools(app): This function is called once, when the builder is initiated. """ - global dot_cmd, convert_cmd, rsvg_convert_cmd # pylint: disable=W0603 + global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd # pylint: disable=W0603 kernellog.verbose(app, "kfigure: check installed tools ...") dot_cmd = which('dot') @@ -174,6 +177,16 @@ def setupTools(app): if dot_cmd: kernellog.verbose(app, "use dot(1) from: " + dot_cmd) + + try: + dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as err: + dot_Thelp_list = err.output + pass + + dot_Tpdf_ptn = b'pdf' + dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list) else: kernellog.warn(app, "dot(1) not found, for better output quality install " "graphviz from https://www.graphviz.org") @@ -185,9 +198,17 @@ def setupTools(app): "ImageMagick (https://www.imagemagick.org)") if rsvg_convert_cmd: kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd) + kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion") + dot_Tpdf = False else: - kernellog.verbose(app, "rsvg-convert(1) not found, " - "falling back to raster image conversion") + kernellog.verbose(app, + "rsvg-convert(1) not found.\n" + " SVG -> PDF conversion by convert() can be poor quality.\n" + " Install librsvg (https://gitlab.gnome.org/GNOME/librsvg)") + if dot_Tpdf: + kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion") + else: + kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion") # integrate conversion tools @@ -277,11 +298,12 @@ def convert_image(img_node, translator, src_fname=None): if in_ext == '.dot': kernellog.verbose(app, 'convert DOT to: {out}/' + _name) - if translator.builder.format == 'latex': + if translator.builder.format == 'latex' and not dot_Tpdf: svg_fname = path.join(translator.builder.outdir, fname + '.svg') ok1 = dot2format(app, src_fname, svg_fname) ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname) ok = ok1 and ok2 + else: ok = dot2format(app, src_fname, dst_fname) -- GitLab From 8ccd05697a9d2f837f77a858e81ba13cdb50adac Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 29 Dec 2021 20:46:58 +0900 Subject: [PATCH 0336/1586] docs: sphinx/kfigure.py: Use inkscape(1) for SVG -> PDF conversion Using convert(1) of ImageMagick for SVG -> PDF conversion results in PDFs containing raster (bitmap) images which sometimes look blurry. Ideally speaking, SVG to PDF conversion should retain vector graphics in SVG. rsvg-convert(1) can do such conversions with regard to SVG files generated by dot(1). Unfortunately, rsvg-convert(1) does not cover some of SVG features specific to Inkscape. inkscape(1) of Inkscape naturally covers such SVG features. So add a route in svg2pdf() so that inkscape(1) is used when it is available. Note: After this change, if you have Inkscape installed, ImageMagick nor librsvg are not required. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/3eea2a8d-c52d-ee07-cf7b-83784c6f6e4b@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kfigure.py | 68 +++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index 77b0d15dba317..e616e49669eb1 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -37,6 +37,7 @@ u""" * SVG to PDF: To generate PDF, you need at least one of this tools: - ``convert(1)``: ImageMagick (https://www.imagemagick.org) + - ``inkscape(1)``: Inkscape (https://inkscape.org/) List of customizations: @@ -121,6 +122,11 @@ convert_cmd = None # librsvg's rsvg-convert(1) support rsvg_convert_cmd = None +# Inkscape's inkscape(1) support +inkscape_cmd = None +# Inkscape prior to 1.0 uses different command options +inkscape_ver_one = False + def setup(app): # check toolchain first @@ -169,11 +175,13 @@ def setupTools(app): This function is called once, when the builder is initiated. """ global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd # pylint: disable=W0603 + global inkscape_cmd, inkscape_ver_one # pylint: disable=W0603 kernellog.verbose(app, "kfigure: check installed tools ...") dot_cmd = which('dot') convert_cmd = which('convert') rsvg_convert_cmd = which('rsvg-convert') + inkscape_cmd = which('inkscape') if dot_cmd: kernellog.verbose(app, "use dot(1) from: " + dot_cmd) @@ -190,25 +198,37 @@ def setupTools(app): else: kernellog.warn(app, "dot(1) not found, for better output quality install " "graphviz from https://www.graphviz.org") - if convert_cmd: - kernellog.verbose(app, "use convert(1) from: " + convert_cmd) - else: - kernellog.warn(app, - "convert(1) not found, for SVG to PDF conversion install " - "ImageMagick (https://www.imagemagick.org)") - if rsvg_convert_cmd: - kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd) - kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion") + if inkscape_cmd: + kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd) + inkscape_ver = subprocess.check_output([inkscape_cmd, '--version']) + ver_one_ptn = b'Inkscape 1' + inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver) + convert_cmd = None + rsvg_convert_cmd = None dot_Tpdf = False + else: - kernellog.verbose(app, - "rsvg-convert(1) not found.\n" - " SVG -> PDF conversion by convert() can be poor quality.\n" - " Install librsvg (https://gitlab.gnome.org/GNOME/librsvg)") - if dot_Tpdf: - kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion") + if convert_cmd: + kernellog.verbose(app, "use convert(1) from: " + convert_cmd) + else: + kernellog.warn(app, + "Neither inkscape(1) nor convert(1) found.\n" + "For SVG to PDF conversion, " + "install either Inkscape (https://inkscape.org/) (preferred) or\n" + "ImageMagick (https://www.imagemagick.org)") + + if rsvg_convert_cmd: + kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd) + kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion") + dot_Tpdf = False else: - kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion") + kernellog.verbose(app, + "rsvg-convert(1) not found.\n" + " SVG rendering of convert(1) is done by ImageMagick-native renderer.") + if dot_Tpdf: + kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion") + else: + kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion") # integrate conversion tools @@ -274,7 +294,7 @@ def convert_image(img_node, translator, src_fname=None): elif in_ext == '.svg': if translator.builder.format == 'latex': - if convert_cmd is None: + if not inkscape_cmd and convert_cmd is None: kernellog.verbose(app, "no SVG to PDF conversion available / include SVG raw.") img_node.replace_self(file2literal(src_fname)) @@ -342,16 +362,24 @@ def dot2format(app, dot_fname, out_fname): return bool(exit_code == 0) def svg2pdf(app, svg_fname, pdf_fname): - """Converts SVG to PDF with ``convert(1)`` command. + """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command. - Uses ``convert(1)`` from ImageMagick (https://www.imagemagick.org) for - conversion. Returns ``True`` on success and ``False`` if an error occurred. + Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)`` + from ImageMagick (https://www.imagemagick.org) for conversion. + Returns ``True`` on success and ``False`` if an error occurred. * ``svg_fname`` pathname of the input SVG file with extension (``.svg``) * ``pdf_name`` pathname of the output PDF file with extension (``.pdf``) """ cmd = [convert_cmd, svg_fname, pdf_fname] + + if inkscape_cmd: + if inkscape_ver_one: + cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname] + else: + cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname] + # use stdout and stderr from parent exit_code = subprocess.call(cmd) if exit_code != 0: -- GitLab From f30a7ac8c6100e88dc416b675425541a337a46c8 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 29 Dec 2021 20:47:56 +0900 Subject: [PATCH 0337/1586] docs: sphinx/kfigure.py: Delegate inkscape msg to kernellog.verbose Depending on its version, distro config, and system-setup type, inkscape(1) emits various warning messages which are harmless in command-line uses. List of such warning messages (incomplete, long ones wrapped): - Gtk-Message: hh:mm:ss.nnn: Failed to load module "canberra-gtk-module" - Unable to init server: Could not connect: Connection refused - Failed to get connection - ** (inkscape:xxx): CRITICAL **: hh:mm:ss.nnn: dbus_g_proxy_new_for_name: assertion 'connection != NULL' failed - ** (inkscape:xxx): CRITICAL **: hh:mm:ss.nnn: dbus_g_proxy_call: assertion 'DBUS_IS_G_PROXY (proxy)' failed - ** (inkscape:xxx): CRITICAL **: hh:mm:ss.nnn: dbus_g_connection_register_g_object: assertion 'connection != NULL' failed - ** (inkscape:xxx): WARNING **: hh:mm:ss.nnn: Fonts dir '/usr/share/inkscape/fonts' does not exist and will be ignored. To avoid unnecessary anxiety, capture the message and output it via kernellog.verbose or kernellog.warn depending on the exit code. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Cc: Randy Dunlap Link: https://lore.kernel.org/r/e26a7b53-9155-8394-4a31-6006379b65a5@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kfigure.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index e616e49669eb1..24d2b2addcce3 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -200,7 +200,8 @@ def setupTools(app): "graphviz from https://www.graphviz.org") if inkscape_cmd: kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd) - inkscape_ver = subprocess.check_output([inkscape_cmd, '--version']) + inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'], + stderr=subprocess.DEVNULL) ver_one_ptn = b'Inkscape 1' inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver) convert_cmd = None @@ -373,17 +374,32 @@ def svg2pdf(app, svg_fname, pdf_fname): """ cmd = [convert_cmd, svg_fname, pdf_fname] + cmd_name = 'convert(1)' if inkscape_cmd: + cmd_name = 'inkscape(1)' if inkscape_ver_one: cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname] else: cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname] - # use stdout and stderr from parent - exit_code = subprocess.call(cmd) + try: + warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + exit_code = 0 + except subprocess.CalledProcessError as err: + warning_msg = err.output + exit_code = err.returncode + pass + if exit_code != 0: kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) + if warning_msg: + kernellog.warn(app, "Warning msg from %s: %s" + % (cmd_name, str(warning_msg, 'utf-8'))) + elif warning_msg: + kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s" + % (cmd_name, str(warning_msg, 'utf-8'))) + return bool(exit_code == 0) def svg2pdf_by_rsvg(app, svg_fname, pdf_fname): -- GitLab From f7e53e2255808ca3abcc8f38d18ad0823425e771 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:08 +0000 Subject: [PATCH 0338/1586] pinctrl: npcm: Fix broken references to chip->parent_device The npcm driver has a bunch of references to the irq_chip parent_device field, but never sets it. Fix it by fishing that reference from somewhere else, but it is obvious that these debug statements were never used. Also remove an unused field in a local data structure. Signed-off-by: Marc Zyngier Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-11-maz@kernel.org --- drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c | 25 +++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 4d81908d6725d..ba536fd4d6740 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { struct gpio_chip gc; int irqbase; int irq; - void *priv; struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); en = ioread32(bank->base + NPCM7XX_GP_N_EVEN); - dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts, + dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts, en); sts &= en; @@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = BIT(d->hwirq); - dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio, + dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio, d->irq, type); switch (type) { case IRQ_TYPE_EDGE_RISING: - dev_dbg(d->chip->parent_device, "edge.rising\n"); + dev_dbg(bank->gc.parent, "edge.rising\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_FALLING: - dev_dbg(d->chip->parent_device, "edge.falling\n"); + dev_dbg(bank->gc.parent, "edge.falling\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_BOTH: - dev_dbg(d->chip->parent_device, "edge.both\n"); + dev_dbg(bank->gc.parent, "edge.both\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); break; case IRQ_TYPE_LEVEL_LOW: - dev_dbg(d->chip->parent_device, "level.low\n"); + dev_dbg(bank->gc.parent, "level.low\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_LEVEL_HIGH: - dev_dbg(d->chip->parent_device, "level.high\n"); + dev_dbg(bank->gc.parent, "level.high\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; default: - dev_dbg(d->chip->parent_device, "invalid irq type\n"); + dev_dbg(bank->gc.parent, "invalid irq type\n"); return -EINVAL; } @@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = d->hwirq; - dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST); } @@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Clear events */ - dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC); } @@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Enable events */ - dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS); } @@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d) unsigned int gpio = d->hwirq; /* active-high, input, clear interrupt, enable interrupt */ - dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq); + dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq); npcmgpio_direction_input(gc, gpio); npcmgpio_irq_ack(d); npcmgpio_irq_unmask(d); -- GitLab From 0d872ed9e2148a8ba29de5a71c352fa54abf8e5e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:09 +0000 Subject: [PATCH 0339/1586] pinctrl: starfive: Move PM device over to irq domain Move the reference to the device over to the irq domain. Signed-off-by: Marc Zyngier Reviewed-by: Emil Renner Berthing Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-12-maz@kernel.org --- drivers/pinctrl/pinctrl-starfive.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-starfive.c b/drivers/pinctrl/pinctrl-starfive.c index 0b912152a405a..5be9866c2b3c0 100644 --- a/drivers/pinctrl/pinctrl-starfive.c +++ b/drivers/pinctrl/pinctrl-starfive.c @@ -1307,7 +1307,6 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.base = -1; sfp->gc.ngpio = NR_GPIOS; - starfive_irq_chip.parent_device = dev; starfive_irq_chip.name = sfp->gc.label; sfp->gc.irq.chip = &starfive_irq_chip; @@ -1330,6 +1329,8 @@ static int starfive_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "could not register gpiochip\n"); + irq_domain_set_pm_device(sfp->gc.irq.domain, dev); + out_pinctrl_enable: return pinctrl_enable(sfp->pctl); } -- GitLab From beb0622138cd2848dec06b0651a988c39d099574 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Feb 2022 12:03:10 +0000 Subject: [PATCH 0340/1586] genirq: Kill irq_chip::parent_device Now that noone is using irq_chip::parent_device in the tree, get rid of it. Signed-off-by: Marc Zyngier Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220201120310.878267-13-maz@kernel.org --- include/linux/irq.h | 2 -- kernel/irq/chip.c | 3 --- 2 files changed, 5 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 848e1e12c5c63..2cb2e2ac2703c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) /** * struct irq_chip - hardware interrupt chip descriptor * - * @parent_device: pointer to parent device for irqchip * @name: name for /proc/interrupts * @irq_startup: start up the interrupt (defaults to ->enable if NULL) * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) @@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @flags: chip specific flags */ struct irq_chip { - struct device *parent_device; const char *name; unsigned int (*irq_startup)(struct irq_data *data); void (*irq_shutdown)(struct irq_data *data); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index a2a12cdbe8725..24b6f2b40e5e9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1560,9 +1560,6 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) static struct device *irq_get_parent_device(struct irq_data *data) { - if (data->chip->parent_device) - return data->chip->parent_device; - if (data->domain) return data->domain->dev; -- GitLab From c95aa2bab974394809edea28690f6504a15791b6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Feb 2022 16:15:46 -0800 Subject: [PATCH 0341/1586] thermal: intel: hfi: INTEL_HFI_THERMAL depends on NET THERMAL_NETLINK depends on NET and since 'select' does not follow any dependency chain, INTEL_HFI_THERMAL also should depend on NET. Fix one Kconfig warning and 48 subsequent build errors: WARNING: unmet direct dependencies detected for THERMAL_NETLINK Depends on [n]: THERMAL [=y] && NET [=n] Selected by [y]: - INTEL_HFI_THERMAL [=y] && THERMAL [=y] && (X86 [=y] || X86_INTEL_QUARK [=n] || COMPILE_TEST [=y]) && CPU_SUP_INTEL [=y] && X86_THERMAL_VECTOR [=y] Fixes: bd30cdfd9bd7 ("thermal: intel: hfi: Notify user space for HFI events") Signed-off-by: Randy Dunlap Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index 6cf3fe36a4ae8..f0c8456792509 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -102,6 +102,7 @@ config INTEL_MENLOW config INTEL_HFI_THERMAL bool "Intel Hardware Feedback Interface" + depends on NET depends on CPU_SUP_INTEL depends on X86_THERMAL_VECTOR select THERMAL_NETLINK -- GitLab From c9c28ed0ab611b6ee3bfab88eba334e272642433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Tue, 1 Feb 2022 09:40:58 +0100 Subject: [PATCH 0342/1586] crypto: hmac - add fips_skip support By adding the support for the flag fips_skip, hash / HMAC test vectors may be marked to be not applicable in FIPS mode. Such vectors are silently skipped in FIPS mode. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/testmgr.c | 3 +++ crypto/testmgr.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 3a5a3e5cb77bf..0c2efde2f6c65 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1851,6 +1851,9 @@ static int __alg_test_hash(const struct hash_testvec *vecs, } for (i = 0; i < num_vecs; i++) { + if (fips_enabled && vecs[i].fips_skip) + continue; + err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate); if (err) goto out; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index a253d66ba1c1a..17b37525f2890 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -33,6 +33,7 @@ * @ksize: Length of @key in bytes (0 if no key) * @setkey_error: Expected error from setkey() * @digest_error: Expected error from digest() + * @fips_skip: Skip the test vector in FIPS mode */ struct hash_testvec { const char *key; @@ -42,6 +43,7 @@ struct hash_testvec { unsigned short ksize; int setkey_error; int digest_error; + bool fips_skip; }; /* -- GitLab From 37f36e5717869a69775ecb23baedf0f06ea940b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Tue, 1 Feb 2022 09:41:32 +0100 Subject: [PATCH 0343/1586] crypto: hmac - disallow keys < 112 bits in FIPS mode FIPS 140 requires a minimum security strength of 112 bits. This implies that the HMAC key must not be smaller than 112 in FIPS mode. This restriction implies that the test vectors for HMAC that have a key that is smaller than 112 bits must be disabled when FIPS support is compiled. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/hmac.c | 4 ++++ crypto/testmgr.h | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/crypto/hmac.c b/crypto/hmac.c index 25856aa7ccbf9..3610ff0b67392 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,9 @@ static int hmac_setkey(struct crypto_shash *parent, SHASH_DESC_ON_STACK(shash, hash); unsigned int i; + if (fips_enabled && (keylen < 112 / 8)) + return -EINVAL; + shash->tfm = hash; if (keylen > bs) { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 17b37525f2890..85ccf811f5e79 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -5715,6 +5715,7 @@ static const struct hash_testvec hmac_sha1_tv_template[] = { .psize = 28, .digest = "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74" "\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", .ksize = 20, @@ -5804,6 +5805,7 @@ static const struct hash_testvec hmac_sha224_tv_template[] = { "\x45\x69\x0f\x3a\x7e\x9e\x6d\x0f" "\x8b\xbe\xa2\xa3\x9e\x61\x48\x00" "\x8f\xd0\x5e\x44", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -5947,6 +5949,7 @@ static const struct hash_testvec hmac_sha256_tv_template[] = { "\x6a\x04\x24\x26\x08\x95\x75\xc7" "\x5a\x00\x3f\x08\x9d\x27\x39\x83" "\x9d\xec\x58\xb9\x64\xec\x38\x43", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -6445,6 +6448,7 @@ static const struct hash_testvec hmac_sha384_tv_template[] = { "\xe4\x2e\xc3\x73\x63\x22\x44\x5e" "\x8e\x22\x40\xca\x5e\x69\xe2\xc7" "\x8b\x32\x39\xec\xfa\xb2\x16\x49", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -6545,6 +6549,7 @@ static const struct hash_testvec hmac_sha512_tv_template[] = { "\x6d\x03\x4f\x65\xf8\xf0\xe6\xfd" "\xca\xea\xb1\xa3\x4d\x4a\x6b\x4b" "\x63\x6e\x07\x0a\x38\xbc\xe7\x37", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -6640,6 +6645,7 @@ static const struct hash_testvec hmac_sha3_224_tv_template[] = { "\x1b\x79\x86\x34\xad\x38\x68\x11" "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b" "\xba\xce\x5e\x66", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -6727,6 +6733,7 @@ static const struct hash_testvec hmac_sha3_256_tv_template[] = { "\x35\x96\xbb\xb0\xda\x73\xb8\x87" "\xc9\x17\x1f\x93\x09\x5b\x29\x4a" "\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -6818,6 +6825,7 @@ static const struct hash_testvec hmac_sha3_384_tv_template[] = { "\x3c\xa1\x35\x08\xa9\x32\x43\xce" "\x48\xc0\x45\xdc\x00\x7f\x26\xa2" "\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" @@ -6917,6 +6925,7 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = { "\xee\x7a\x0c\x31\xd0\x22\xa9\x5e" "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83" "\x96\x02\x75\xbe\xb4\xe6\x20\x24", + .fips_skip = 1, }, { .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" -- GitLab From e8bf24bd439da1ee7f37c2b03f44c6ad37c0c8c0 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 4 Feb 2022 15:35:22 +0200 Subject: [PATCH 0344/1586] crypto: atmel-tdes - Add support for the TDES IP available on sama7g5 SoC Add support for the TDES IP found on sama7g5. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/atmel-tdes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index e30786ec9f2d4..9fd7b8e439d2f 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -1130,6 +1130,7 @@ static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xf00) { + case 0x800: case 0x700: dd->caps.has_dma = 1; dd->caps.has_cfb_3keys = 1; -- GitLab From 297565aa22cfa80ab0f88c3569693aea0b6afb6d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 5 Feb 2022 16:23:45 +0100 Subject: [PATCH 0345/1586] lib/xor: make xor prototypes more friendly to compiler vectorization Modern compilers are perfectly capable of extracting parallelism from the XOR routines, provided that the prototypes reflect the nature of the input accurately, in particular, the fact that the input vectors are expected not to overlap. This is not documented explicitly, but is implied by the interchangeability of the various C routines, some of which use temporary variables while others don't: this means that these routines only behave identically for non-overlapping inputs. So let's decorate these input vectors with the __restrict modifier, which informs the compiler that there is no overlap. While at it, make the input-only vectors pointer-to-const as well. Tested-by: Nathan Chancellor Signed-off-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/563 Signed-off-by: Herbert Xu --- arch/alpha/include/asm/xor.h | 53 ++++++++++------ arch/arm/include/asm/xor.h | 42 ++++++++----- arch/arm64/include/asm/xor.h | 21 ++++--- arch/arm64/lib/xor-neon.c | 46 ++++++++------ arch/ia64/include/asm/xor.h | 21 ++++--- arch/powerpc/include/asm/xor_altivec.h | 25 ++++---- arch/powerpc/lib/xor_vmx.c | 28 ++++++--- arch/powerpc/lib/xor_vmx.h | 27 +++++---- arch/powerpc/lib/xor_vmx_glue.c | 32 +++++----- arch/s390/lib/xor.c | 21 ++++--- arch/sparc/include/asm/xor_32.h | 21 ++++--- arch/sparc/include/asm/xor_64.h | 42 ++++++++----- arch/x86/include/asm/xor.h | 42 ++++++++----- arch/x86/include/asm/xor_32.h | 42 ++++++++----- arch/x86/include/asm/xor_avx.h | 21 ++++--- include/asm-generic/xor.h | 84 +++++++++++++++++--------- include/linux/raid/xor.h | 21 ++++--- 17 files changed, 381 insertions(+), 208 deletions(-) diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h index 5aeb4fb3cb7cb..e0de0c233ab92 100644 --- a/arch/alpha/include/asm/xor.h +++ b/arch/alpha/include/asm/xor.h @@ -5,24 +5,43 @@ * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 */ -extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); -extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +extern void +xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void +xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void +xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void +xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); -extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, - unsigned long *); -extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, - unsigned long *, unsigned long *, - unsigned long *); -extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, - unsigned long *, unsigned long *, - unsigned long *, unsigned long *); +extern void +xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void +xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void +xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void +xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); asm(" \n\ .text \n\ diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index aefddec79286a..669cad5194d3d 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -44,7 +44,8 @@ : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) static void -xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned int lines = bytes / sizeof(unsigned long) / 4; register unsigned int a1 __asm__("r4"); @@ -64,8 +65,9 @@ xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned int lines = bytes / sizeof(unsigned long) / 4; register unsigned int a1 __asm__("r4"); @@ -86,8 +88,10 @@ xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned int lines = bytes / sizeof(unsigned long) / 2; register unsigned int a1 __asm__("r8"); @@ -105,8 +109,11 @@ xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned int lines = bytes / sizeof(unsigned long) / 2; register unsigned int a1 __asm__("r8"); @@ -146,7 +153,8 @@ static struct xor_block_template xor_block_arm4regs = { extern struct xor_block_template const xor_block_neon_inner; static void -xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { if (in_interrupt()) { xor_arm4regs_2(bytes, p1, p2); @@ -158,8 +166,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { if (in_interrupt()) { xor_arm4regs_3(bytes, p1, p2, p3); @@ -171,8 +180,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { if (in_interrupt()) { xor_arm4regs_4(bytes, p1, p2, p3, p4); @@ -184,8 +195,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { if (in_interrupt()) { xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index 947f6a4f1aa0a..befcd8a7abc98 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -16,7 +16,8 @@ extern struct xor_block_template const xor_block_inner_neon; static void -xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { kernel_neon_begin(); xor_block_inner_neon.do_2(bytes, p1, p2); @@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { kernel_neon_begin(); xor_block_inner_neon.do_3(bytes, p1, p2, p3); @@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { kernel_neon_begin(); xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); @@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { kernel_neon_begin(); xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c index d189cf4e70ea6..96b171995d198 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/arch/arm64/lib/xor-neon.c @@ -10,8 +10,8 @@ #include #include -void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1, - unsigned long *p2) +void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -37,8 +37,9 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -72,8 +73,10 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, unsigned long *p4) +void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -115,9 +118,11 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, - unsigned long *p4, unsigned long *p5) +void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -186,8 +191,10 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) return res; } -static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +static void xor_arm64_eor3_3(unsigned long bytes, + unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -219,9 +226,11 @@ static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, - unsigned long *p4) +static void xor_arm64_eor3_4(unsigned long bytes, + unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -261,9 +270,12 @@ static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -static void xor_arm64_eor3_5(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, - unsigned long *p4, unsigned long *p5) +static void xor_arm64_eor3_5(unsigned long bytes, + unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h index 673051bf9d7da..6785f70d3208b 100644 --- a/arch/ia64/include/asm/xor.h +++ b/arch/ia64/include/asm/xor.h @@ -4,13 +4,20 @@ */ -extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *); -extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); static struct xor_block_template xor_block_ia64 = { .name = "ia64", diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h index 6ca923510b597..294620a25f802 100644 --- a/arch/powerpc/include/asm/xor_altivec.h +++ b/arch/powerpc/include/asm/xor_altivec.h @@ -3,17 +3,20 @@ #define _ASM_POWERPC_XOR_ALTIVEC_H #ifdef CONFIG_ALTIVEC - -void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in); -void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in); -void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in); -void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in); +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); #endif #endif /* _ASM_POWERPC_XOR_ALTIVEC_H */ diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c index 54e61979e80e5..aab49d056d188 100644 --- a/arch/powerpc/lib/xor_vmx.c +++ b/arch/powerpc/lib/xor_vmx.c @@ -49,8 +49,9 @@ typedef vector signed char unative_t; V1##_3 = vec_xor(V1##_3, V2##_3); \ } while (0) -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) { DEFINE(v1); DEFINE(v2); @@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) { DEFINE(v1); DEFINE(v2); @@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) { DEFINE(v1); DEFINE(v2); @@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) { DEFINE(v1); DEFINE(v2); diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h index 5c2b0839b1794..573c41d90dac5 100644 --- a/arch/powerpc/lib/xor_vmx.h +++ b/arch/powerpc/lib/xor_vmx.h @@ -6,16 +6,17 @@ * outside of the enable/disable altivec block. */ -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in); - -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in); - -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in); - -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in); +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c index 80dba916c3674..35d917ece4d1e 100644 --- a/arch/powerpc/lib/xor_vmx_glue.c +++ b/arch/powerpc/lib/xor_vmx_glue.c @@ -12,47 +12,51 @@ #include #include "xor_vmx.h" -void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_2(bytes, v1_in, v2_in); + __xor_altivec_2(bytes, p1, p2); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_2); -void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_3(bytes, v1_in, v2_in, v3_in); + __xor_altivec_3(bytes, p1, p2, p3); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_3); -void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in); + __xor_altivec_4(bytes, p1, p2, p3, p4); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_4); -void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); disable_kernel_altivec(); preempt_enable(); } diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index a963c3d8ad0d9..fb924a8041dc7 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -11,7 +11,8 @@ #include #include -static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { asm volatile( " larl 1,2f\n" @@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : "0", "1", "cc", "memory"); } -static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { asm volatile( " larl 1,2f\n" @@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "0", "1", "cc", "memory"); } -static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { asm volatile( " larl 1,2f\n" @@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "0", "1", "cc", "memory"); } -static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { asm volatile( " larl 1,2f\n" diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h index 3e5af37e4b9cd..0351813cf3af5 100644 --- a/arch/sparc/include/asm/xor_32.h +++ b/arch/sparc/include/asm/xor_32.h @@ -13,7 +13,8 @@ */ static void -sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +sparc_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { int lines = bytes / (sizeof (long)) / 8; @@ -50,8 +51,9 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +sparc_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { int lines = bytes / (sizeof (long)) / 8; @@ -101,8 +103,10 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +sparc_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { int lines = bytes / (sizeof (long)) / 8; @@ -165,8 +169,11 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +sparc_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { int lines = bytes / (sizeof (long)) / 8; diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h index 16169f3edcd5b..caaddea8ad79d 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/arch/sparc/include/asm/xor_64.h @@ -12,13 +12,20 @@ #include -void xor_vis_2(unsigned long, unsigned long *, unsigned long *); -void xor_vis_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -void xor_vis_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -void xor_vis_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_vis_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void xor_vis_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); /* XXX Ugh, write cheetah versions... -DaveM */ @@ -30,13 +37,20 @@ static struct xor_block_template xor_block_VIS = { .do_5 = xor_vis_5, }; -void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); -void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_niagara_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void xor_niagara_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); static struct xor_block_template xor_block_niagara = { .name = "Niagara", diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 2ee95a7769e60..7b0307acc4103 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -57,7 +57,8 @@ op(i + 3, 3) static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 8; @@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 8; @@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 8; @@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 8; @@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 8; @@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 8; @@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 8; @@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 8; diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 67ceb790e6397..7a6b9474591e7 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -21,7 +21,8 @@ #include static void -xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 7; @@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 7; @@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 7; @@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, static void -xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 7; @@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, #undef BLOCK static void -xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 6; @@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 6; @@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 6; @@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 6; diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 0c4e5b5e3852b..7f81dd5897f41 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -26,7 +26,8 @@ BLOCK4(8) \ BLOCK4(12) -static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) +static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1) { unsigned long lines = bytes >> 9; @@ -52,8 +53,9 @@ do { \ kernel_fpu_end(); } -static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2) +static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 9; @@ -82,8 +84,10 @@ do { \ kernel_fpu_end(); } -static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 9; @@ -115,8 +119,11 @@ do { \ kernel_fpu_end(); } -static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2, unsigned long *p3, unsigned long *p4) +static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 9; diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h index b62a2a56a4d49..44509d48fca21 100644 --- a/include/asm-generic/xor.h +++ b/include/asm-generic/xor.h @@ -8,7 +8,8 @@ #include static void -xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8; @@ -27,8 +28,9 @@ xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8; @@ -48,8 +50,10 @@ xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8; @@ -70,8 +74,11 @@ xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8; @@ -93,7 +100,8 @@ xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8; @@ -129,8 +137,9 @@ xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8; @@ -175,8 +184,10 @@ xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8; @@ -230,8 +241,11 @@ xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8; @@ -294,7 +308,8 @@ xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8 - 1; prefetchw(p1); @@ -320,8 +335,9 @@ xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8 - 1; prefetchw(p1); @@ -350,8 +366,10 @@ xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -384,8 +402,11 @@ xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -421,7 +442,8 @@ xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -466,8 +488,9 @@ xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -523,8 +546,10 @@ xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { long lines = bytes / (sizeof (long)) / 8 - 1; @@ -591,8 +616,11 @@ xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { long lines = bytes / (sizeof (long)) / 8 - 1; diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 2a9fee8ddae3f..51b811b623224 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -11,13 +11,20 @@ struct xor_block_template { struct xor_block_template *next; const char *name; int speed; - void (*do_2)(unsigned long, unsigned long *, unsigned long *); - void (*do_3)(unsigned long, unsigned long *, unsigned long *, - unsigned long *); - void (*do_4)(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); - void (*do_5)(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); + void (*do_2)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_3)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_4)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_5)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); }; #endif -- GitLab From a69cb445f7d129abf7c50d48c8a8eca7c8d5df15 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 5 Feb 2022 16:23:46 +0100 Subject: [PATCH 0346/1586] crypto: arm/xor - make vectorized C code Clang-friendly The ARM version of the accelerated XOR routines are simply the 8-way C routines passed through the auto-vectorizer with SIMD codegen enabled. This used to require GCC version 4.6 at least, but given that 5.1 is now the baseline, this check is no longer necessary, and actually misidentifies Clang as GCC < 4.6 as Clang defines the GCC major/minor as well, but makes no attempt at doing this in a way that conveys feature parity with a certain version of GCC (which would not be a great idea in the first place). So let's drop the version check, and make the auto-vectorize pragma (which is based on a GCC-specific command line option) GCC-only. Since Clang performs SIMD auto-vectorization by default at -O2, no pragma is necessary here. Tested-by: Nathan Chancellor Signed-off-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/496 Link: https://github.com/ClangBuiltLinux/linux/issues/503 Signed-off-by: Herbert Xu --- arch/arm/lib/xor-neon.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index b99dd8e1c93f1..522510baed490 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -17,17 +17,11 @@ MODULE_LICENSE("GPL"); /* * Pull in the reference implementations while instructing GCC (through * -ftree-vectorize) to attempt to exploit implicit parallelism and emit - * NEON instructions. + * NEON instructions. Clang does this by default at O2 so no pragma is + * needed. */ -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#ifdef CONFIG_CC_IS_GCC #pragma GCC optimize "tree-vectorize" -#else -/* - * While older versions of GCC do not generate incorrect code, they fail to - * recognize the parallel nature of these functions, and emit plain ARM code, - * which is known to be slower than the optimized ARM code in asm-arm/xor.h. - */ -#warning This code requires at least version 4.6 of GCC #endif #pragma GCC diagnostic ignored "-Wunused-variable" -- GitLab From d5869fdc189f0f12a954a48d58a48104a2f5d044 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 10 Feb 2022 14:52:22 -0800 Subject: [PATCH 0347/1586] block: introduce block_rq_error tracepoint Currently, rasdaemon uses the existing tracepoint block_rq_complete and filters out non-error cases in order to capture block disk errors. But there are a few problems with this approach: 1. Even kernel trace filter could do the filtering work, there is still some overhead after we enable this tracepoint. 2. The filter is merely based on errno, which does not align with kernel logic to check the errors for print_req_error(). 3. block_rq_complete only provides dev major and minor to identify the block device, it is not convenient to use in user-space. So introduce a new tracepoint block_rq_error just for the error case. With this patch, rasdaemon could switch to block_rq_error. Since the new tracepoint has the similar implementation with block_rq_complete, so move the existing code from TRACE_EVENT block_rq_complete() into new event class block_rq_completion(). Then add event for block_rq_complete and block_rq_err respectively from the newly created event class per the suggestion from Chaitanya Kulkarni. Cc: Jens Axboe Cc: Christoph Hellwig Reviewed-by: Steven Rostedt Signed-off-by: Cong Wang Signed-off-by: Chaitanya Kulkarni Signed-off-by: Yang Shi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220210225222.260069-1-shy828301@gmail.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++- include/trace/events/block.h | 49 ++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4b868e792ba4a..6c59ffe765fde 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -789,8 +789,10 @@ bool blk_update_request(struct request *req, blk_status_t error, #endif if (unlikely(error && !blk_rq_is_passthrough(req) && - !(req->rq_flags & RQF_QUIET))) + !(req->rq_flags & RQF_QUIET))) { blk_print_req_error(req, error); + trace_block_rq_error(req, error, nr_bytes); + } blk_account_io_completion(req, nr_bytes); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 27170e40e8c95..7f4dfbdf12a6f 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -100,19 +100,7 @@ TRACE_EVENT(block_rq_requeue, __entry->nr_sector, 0) ); -/** - * block_rq_complete - block IO operation completed by device driver - * @rq: block operations request - * @error: status code - * @nr_bytes: number of completed bytes - * - * The block_rq_complete tracepoint event indicates that some portion - * of operation request has been completed by the device driver. If - * the @rq->bio is %NULL, then there is absolutely no additional work to - * do for the request. If @rq->bio is non-NULL then there is - * additional work required to complete the request. - */ -TRACE_EVENT(block_rq_complete, +DECLARE_EVENT_CLASS(block_rq_completion, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), @@ -144,6 +132,41 @@ TRACE_EVENT(block_rq_complete, __entry->nr_sector, __entry->error) ); +/** + * block_rq_complete - block IO operation completed by device driver + * @rq: block operations request + * @error: status code + * @nr_bytes: number of completed bytes + * + * The block_rq_complete tracepoint event indicates that some portion + * of operation request has been completed by the device driver. If + * the @rq->bio is %NULL, then there is absolutely no additional work to + * do for the request. If @rq->bio is non-NULL then there is + * additional work required to complete the request. + */ +DEFINE_EVENT(block_rq_completion, block_rq_complete, + + TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), + + TP_ARGS(rq, error, nr_bytes) +); + +/** + * block_rq_error - block IO operation error reported by device driver + * @rq: block operations request + * @error: status code + * @nr_bytes: number of completed bytes + * + * The block_rq_error tracepoint event indicates that some portion + * of operation request has failed as reported by the device driver. + */ +DEFINE_EVENT(block_rq_completion, block_rq_error, + + TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), + + TP_ARGS(rq, error, nr_bytes) +); + DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request *rq), -- GitLab From 0e51e2ab49a99bc5077760aa083dfa1c3bf9899b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:47 +0800 Subject: [PATCH 0348/1586] block: remove THROTL_IOPS_MAX No one uses THROTL_IOPS_MAX any more, so remove it. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211101149.2368042-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b4de2010fba55..bdc49bd4eef02 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -28,8 +28,6 @@ /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) -/* Max limits for throttle policy */ -#define THROTL_IOPS_MAX UINT_MAX #define FC_APPID_LEN 129 -- GitLab From 472e4314c039d6cf36e28783b1c63f87b5b394c2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:48 +0800 Subject: [PATCH 0349/1586] block: move initialization of q->blkg_list into blkcg_init_queue q->blkg_list is only used by blkcg code, so move it into blkcg_init_queue. Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220211101149.2368042-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 ++ block/blk-core.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 671debbae9413..35deaceba1f07 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1175,6 +1175,8 @@ int blkcg_init_queue(struct request_queue *q) bool preloaded; int ret; + INIT_LIST_HEAD(&q->blkg_list); + new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); if (!new_blkg) return -ENOMEM; diff --git a/block/blk-core.c b/block/blk-core.c index be8812f5489d4..ff972b968f253 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -476,9 +476,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); INIT_WORK(&q->timeout_work, blk_timeout_work); INIT_LIST_HEAD(&q->icq_list); -#ifdef CONFIG_BLK_CGROUP - INIT_LIST_HEAD(&q->blkg_list); -#endif kobject_init(&q->kobj, &blk_queue_ktype); -- GitLab From 672fdcf0e7de3b1e39416ac85abf178f023271f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Feb 2022 18:11:49 +0800 Subject: [PATCH 0350/1586] block: partition include/linux/blk-cgroup.h Partition include/linux/blk-cgroup.h into two parts: one is public part, the other is block layer private part. Suggested by Christoph Hellwig. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211101149.2368042-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/bfq-iosched.h | 1 - block/bio.c | 2 +- block/blk-cgroup-rwstat.h | 2 +- block/blk-cgroup.c | 2 +- block/blk-cgroup.h | 477 ++++++++++++++++++++++++++++++++++++ block/blk-core.c | 2 +- block/blk-crypto-fallback.c | 2 +- block/blk-iocost.c | 2 +- block/blk-iolatency.c | 2 +- block/blk-ioprio.c | 2 +- block/blk-sysfs.c | 2 +- block/blk-throttle.c | 1 - block/bounce.c | 2 +- block/elevator.c | 2 +- include/linux/blk-cgroup.h | 459 +--------------------------------- 15 files changed, 493 insertions(+), 467 deletions(-) create mode 100644 block/blk-cgroup.h diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 07288b9da3895..72255ec44f8f8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -8,7 +8,6 @@ #include #include -#include #include "blk-cgroup-rwstat.h" diff --git a/block/bio.c b/block/bio.c index 18d34b33351b8..b15f5466ce084 100644 --- a/block/bio.c +++ b/block/bio.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include "blk.h" #include "blk-rq-qos.h" +#include "blk-cgroup.h" struct bio_alloc_cache { struct bio *free_list; diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h index ee746919c41fc..9f2723b34b759 100644 --- a/block/blk-cgroup-rwstat.h +++ b/block/blk-cgroup-rwstat.h @@ -6,7 +6,7 @@ #ifndef _BLK_CGROUP_RWSTAT_H #define _BLK_CGROUP_RWSTAT_H -#include +#include "blk-cgroup.h" enum blkg_rwstat_type { BLKG_RWSTAT_READ, diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 35deaceba1f07..4108d445c73af 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -26,11 +26,11 @@ #include #include #include -#include #include #include #include #include "blk.h" +#include "blk-cgroup.h" #include "blk-ioprio.h" #include "blk-throttle.h" diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h new file mode 100644 index 0000000000000..3e91803c4a555 --- /dev/null +++ b/block/blk-cgroup.h @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BLK_CGROUP_PRIVATE_H +#define _BLK_CGROUP_PRIVATE_H +/* + * block cgroup private header + * + * Based on ideas and code from CFQ, CFS and BFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2009 Vivek Goyal + * Nauman Rafique + */ + +#include + +/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ +#define BLKG_STAT_CPU_BATCH (INT_MAX / 2) + +#ifdef CONFIG_BLK_CGROUP + +/* + * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a + * request_queue (q). This is used by blkcg policies which need to track + * information per blkcg - q pair. + * + * There can be multiple active blkcg policies and each blkg:policy pair is + * represented by a blkg_policy_data which is allocated and freed by each + * policy's pd_alloc/free_fn() methods. A policy can allocate private data + * area by allocating larger data structure which embeds blkg_policy_data + * at the beginning. + */ +struct blkg_policy_data { + /* the blkg and policy id this per-policy data belongs to */ + struct blkcg_gq *blkg; + int plid; +}; + +/* + * Policies that need to keep per-blkcg data which is independent from any + * request_queue associated to it should implement cpd_alloc/free_fn() + * methods. A policy can allocate private data area by allocating larger + * data structure which embeds blkcg_policy_data at the beginning. + * cpd_init() is invoked to let each policy handle per-blkcg data. + */ +struct blkcg_policy_data { + /* the blkcg and policy id this per-policy data belongs to */ + struct blkcg *blkcg; + int plid; +}; + +typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); +typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); +typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); +typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); +typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, + struct request_queue *q, struct blkcg *blkcg); +typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); +typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, + struct seq_file *s); + +struct blkcg_policy { + int plid; + /* cgroup files for the policy */ + struct cftype *dfl_cftypes; + struct cftype *legacy_cftypes; + + /* operations */ + blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; + blkcg_pol_init_cpd_fn *cpd_init_fn; + blkcg_pol_free_cpd_fn *cpd_free_fn; + blkcg_pol_bind_cpd_fn *cpd_bind_fn; + + blkcg_pol_alloc_pd_fn *pd_alloc_fn; + blkcg_pol_init_pd_fn *pd_init_fn; + blkcg_pol_online_pd_fn *pd_online_fn; + blkcg_pol_offline_pd_fn *pd_offline_fn; + blkcg_pol_free_pd_fn *pd_free_fn; + blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; + blkcg_pol_stat_pd_fn *pd_stat_fn; +}; + +extern struct blkcg blkcg_root; +extern bool blkcg_debug_stats; + +struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, + struct request_queue *q, bool update_hint); +int blkcg_init_queue(struct request_queue *q); +void blkcg_exit_queue(struct request_queue *q); + +/* Blkio controller policy registration */ +int blkcg_policy_register(struct blkcg_policy *pol); +void blkcg_policy_unregister(struct blkcg_policy *pol); +int blkcg_activate_policy(struct request_queue *q, + const struct blkcg_policy *pol); +void blkcg_deactivate_policy(struct request_queue *q, + const struct blkcg_policy *pol); + +const char *blkg_dev_name(struct blkcg_gq *blkg); +void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, + u64 (*prfill)(struct seq_file *, + struct blkg_policy_data *, int), + const struct blkcg_policy *pol, int data, + bool show_total); +u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); + +struct blkg_conf_ctx { + struct block_device *bdev; + struct blkcg_gq *blkg; + char *body; +}; + +struct block_device *blkcg_conf_open_bdev(char **inputp); +int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, + char *input, struct blkg_conf_ctx *ctx); +void blkg_conf_finish(struct blkg_conf_ctx *ctx); + +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + +/** + * __bio_blkcg - internal, inconsistent version to get blkcg + * + * DO NOT USE. + * This function is inconsistent and consequently is dangerous to use. The + * first part of the function returns a blkcg where a reference is owned by the + * bio. This means it does not need to be rcu protected as it cannot go away + * with the bio owning a reference to it. However, the latter potentially gets + * it from task_css(). This can race against task migration and the cgroup + * dying. It is also semantically different as it must be called rcu protected + * and is susceptible to failure when trying to get a reference to it. + * Therefore, it is not ok to assume that *_get() will always succeed on the + * blkcg returned here. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) +{ + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return css_to_blkcg(blkcg_css()); +} + +/** + * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg + * @return: true if this bio needs to be submitted with the root blkg context. + * + * In order to avoid priority inversions we sometimes need to issue a bio as if + * it were attached to the root blkg, and then backcharge to the actual owning + * blkg. The idea is we do bio_blkcg() to look up the actual context for the + * bio and attach the appropriate blkg to the bio. Then we call this helper and + * if it is true run with the root blkg for that queue and then do any + * backcharging to the originating cgroup once the io is complete. + */ +static inline bool bio_issue_as_root_blkg(struct bio *bio) +{ + return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; +} + +/** + * __blkg_lookup - internal version of blkg_lookup() + * @blkcg: blkcg of interest + * @q: request_queue of interest + * @update_hint: whether to update lookup hint with the result or not + * + * This is internal version and shouldn't be used by policy + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of + * @q's bypass state. If @update_hint is %true, the caller should be + * holding @q->queue_lock and lookup hint is updated on success. + */ +static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, + struct request_queue *q, + bool update_hint) +{ + struct blkcg_gq *blkg; + + if (blkcg == &blkcg_root) + return q->root_blkg; + + blkg = rcu_dereference(blkcg->blkg_hint); + if (blkg && blkg->q == q) + return blkg; + + return blkg_lookup_slowpath(blkcg, q, update_hint); +} + +/** + * blkg_lookup - lookup blkg for the specified blkcg - q pair + * @blkcg: blkcg of interest + * @q: request_queue of interest + * + * Lookup blkg for the @blkcg - @q pair. This function should be called + * under RCU read lock. + */ +static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, + struct request_queue *q) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return __blkg_lookup(blkcg, q, false); +} + +/** + * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair + * @q: request_queue of interest + * + * Lookup blkg for @q at the root level. See also blkg_lookup(). + */ +static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) +{ + return q->root_blkg; +} + +/** + * blkg_to_pdata - get policy private data + * @blkg: blkg of interest + * @pol: policy of interest + * + * Return pointer to private data associated with the @blkg-@pol pair. + */ +static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, + struct blkcg_policy *pol) +{ + return blkg ? blkg->pd[pol->plid] : NULL; +} + +static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, + struct blkcg_policy *pol) +{ + return blkcg ? blkcg->cpd[pol->plid] : NULL; +} + +/** + * pdata_to_blkg - get blkg associated with policy private data + * @pd: policy private data of interest + * + * @pd is policy private data. Determine the blkg it's associated with. + */ +static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) +{ + return pd ? pd->blkg : NULL; +} + +static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) +{ + return cpd ? cpd->blkcg : NULL; +} + +/** + * blkg_path - format cgroup path of blkg + * @blkg: blkg of interest + * @buf: target buffer + * @buflen: target buffer length + * + * Format the path of the cgroup of @blkg into @buf. + */ +static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) +{ + return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); +} + +/** + * blkg_get - get a blkg reference + * @blkg: blkg to get + * + * The caller should be holding an existing reference. + */ +static inline void blkg_get(struct blkcg_gq *blkg) +{ + percpu_ref_get(&blkg->refcnt); +} + +/** + * blkg_tryget - try and get a blkg reference + * @blkg: blkg to get + * + * This is for use when doing an RCU lookup of the blkg. We may be in the midst + * of freeing this blkg, so we can only use it if the refcnt is not zero. + */ +static inline bool blkg_tryget(struct blkcg_gq *blkg) +{ + return blkg && percpu_ref_tryget(&blkg->refcnt); +} + +/** + * blkg_put - put a blkg reference + * @blkg: blkg to put + */ +static inline void blkg_put(struct blkcg_gq *blkg) +{ + percpu_ref_put(&blkg->refcnt); +} + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_css: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_css by calling css_rightmost_descendant() to skip subtree. + * @p_blkg is included in the iteration and the first node to be visited. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ + css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ + if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ + (p_blkg)->q, false))) + +/** + * blkg_for_each_descendant_post - post-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_css: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Similar to blkg_for_each_descendant_pre() but performs post-order + * traversal instead. Synchronization rules are the same. @p_blkg is + * included in the iteration and the last node to be visited. + */ +#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ + css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ + if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ + (p_blkg)->q, false))) + +bool __blkcg_punt_bio_submit(struct bio *bio); + +static inline bool blkcg_punt_bio_submit(struct bio *bio) +{ + if (bio->bi_opf & REQ_CGROUP_PUNT) + return __blkcg_punt_bio_submit(bio); + else + return false; +} + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + +static inline void blkcg_use_delay(struct blkcg_gq *blkg) +{ + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) + return; + if (atomic_add_return(1, &blkg->use_delay) == 1) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); +} + +static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) +{ + int old = atomic_read(&blkg->use_delay); + + if (WARN_ON_ONCE(old < 0)) + return 0; + if (old == 0) + return 0; + + /* + * We do this song and dance because we can race with somebody else + * adding or removing delay. If we just did an atomic_dec we'd end up + * negative and we'd already be in trouble. We need to subtract 1 and + * then check to see if we were the last delay so we can drop the + * congestion count on the cgroup. + */ + while (old) { + int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); + if (cur == old) + break; + old = cur; + } + + if (old == 0) + return 0; + if (old == 1) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); + return 1; +} + +/** + * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount + * @blkg: target blkg + * @delay: delay duration in nsecs + * + * When enabled with this function, the delay is not decayed and must be + * explicitly cleared with blkcg_clear_delay(). Must not be mixed with + * blkcg_[un]use_delay() and blkcg_add_delay() usages. + */ +static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) +{ + int old = atomic_read(&blkg->use_delay); + + /* We only want 1 person setting the congestion count for this blkg. */ + if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); + + atomic64_set(&blkg->delay_nsec, delay); +} + +/** + * blkcg_clear_delay - Disable allocator delay mechanism + * @blkg: target blkg + * + * Disable use_delay mechanism. See blkcg_set_delay(). + */ +static inline void blkcg_clear_delay(struct blkcg_gq *blkg) +{ + int old = atomic_read(&blkg->use_delay); + + /* We only want 1 person clearing the congestion count for this blkg. */ + if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); +} + +void blk_cgroup_bio_start(struct bio *bio); +void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); +#else /* CONFIG_BLK_CGROUP */ + +struct blkg_policy_data { +}; + +struct blkcg_policy_data { +}; + +struct blkcg_policy { +}; + +#ifdef CONFIG_BLOCK + +static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } +static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) +{ return NULL; } +static inline int blkcg_init_queue(struct request_queue *q) { return 0; } +static inline void blkcg_exit_queue(struct request_queue *q) { } +static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } +static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } +static inline int blkcg_activate_policy(struct request_queue *q, + const struct blkcg_policy *pol) { return 0; } +static inline void blkcg_deactivate_policy(struct request_queue *q, + const struct blkcg_policy *pol) { } + +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } + +static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, + struct blkcg_policy *pol) { return NULL; } +static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } +static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } +static inline void blkg_get(struct blkcg_gq *blkg) { } +static inline void blkg_put(struct blkcg_gq *blkg) { } + +static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } +static inline void blkcg_bio_issue_init(struct bio *bio) { } +static inline void blk_cgroup_bio_start(struct bio *bio) { } + +#define blk_queue_for_each_rl(rl, q) \ + for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) + +#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BLK_CGROUP */ + +#endif /* _BLK_CGROUP_PRIVATE_H */ diff --git a/block/blk-core.c b/block/blk-core.c index ff972b968f253..5a4a590416298 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -49,6 +48,7 @@ #include "blk.h" #include "blk-mq-sched.h" #include "blk-pm.h" +#include "blk-cgroup.h" #include "blk-throttle.h" struct dentry *blk_debugfs_root; diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index c87aba8584c64..18c8eafe20b94 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -10,7 +10,6 @@ #define pr_fmt(fmt) "blk-crypto-fallback: " fmt #include -#include #include #include #include @@ -20,6 +19,7 @@ #include #include +#include "blk-cgroup.h" #include "blk-crypto-internal.h" static unsigned int num_prealloc_bounce_pg = 32; diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 769b643942989..70a0a3d680a35 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -178,12 +178,12 @@ #include #include #include -#include #include #include #include "blk-rq-qos.h" #include "blk-stat.h" #include "blk-wbt.h" +#include "blk-cgroup.h" #ifdef CONFIG_TRACEPOINTS diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 6593c7123b97e..010e658d44a82 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -74,9 +74,9 @@ #include #include #include -#include #include "blk-rq-qos.h" #include "blk-stat.h" +#include "blk-cgroup.h" #include "blk.h" #define DEFAULT_SCALE_COOKIE 1000000U diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c index 2e7f10e1c03fc..79e797f5d1949 100644 --- a/block/blk-ioprio.c +++ b/block/blk-ioprio.c @@ -12,11 +12,11 @@ * Documentation/admin-guide/cgroup-v2.rst. */ -#include #include #include #include #include +#include "blk-cgroup.h" #include "blk-ioprio.h" #include "blk-rq-qos.h" diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9f32882ceb2f6..4c6b7dff71e5b 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "blk.h" @@ -18,6 +17,7 @@ #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-wbt.h" +#include "blk-cgroup.h" #include "blk-throttle.h" struct queue_sysfs_entry { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 7c462c006b269..73640d80e99ec 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "blk.h" #include "blk-cgroup-rwstat.h" #include "blk-stat.h" diff --git a/block/bounce.c b/block/bounce.c index 3fd3bc6fd5dbb..3d50d19cde72a 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include "blk.h" +#include "blk-cgroup.h" #define POOL_SIZE 64 #define ISA_POOL_SIZE 16 diff --git a/block/elevator.c b/block/elevator.c index ec98aed39c4f5..6847ab6e7aa50 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -35,7 +35,6 @@ #include #include #include -#include #include @@ -44,6 +43,7 @@ #include "blk-mq-sched.h" #include "blk-pm.h" #include "blk-wbt.h" +#include "blk-cgroup.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index bdc49bd4eef02..f2ad8ed8f777c 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -25,12 +25,8 @@ #include #include -/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ -#define BLKG_STAT_CPU_BATCH (INT_MAX / 2) - #define FC_APPID_LEN 129 - #ifdef CONFIG_BLK_CGROUP enum blkg_iostat_type { @@ -42,6 +38,7 @@ enum blkg_iostat_type { }; struct blkcg_gq; +struct blkg_policy_data; struct blkcg { struct cgroup_subsys_state css; @@ -74,36 +71,6 @@ struct blkg_iostat_set { struct blkg_iostat last; }; -/* - * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a - * request_queue (q). This is used by blkcg policies which need to track - * information per blkcg - q pair. - * - * There can be multiple active blkcg policies and each blkg:policy pair is - * represented by a blkg_policy_data which is allocated and freed by each - * policy's pd_alloc/free_fn() methods. A policy can allocate private data - * area by allocating larger data structure which embeds blkg_policy_data - * at the beginning. - */ -struct blkg_policy_data { - /* the blkg and policy id this per-policy data belongs to */ - struct blkcg_gq *blkg; - int plid; -}; - -/* - * Policies that need to keep per-blkcg data which is independent from any - * request_queue associated to it should implement cpd_alloc/free_fn() - * methods. A policy can allocate private data area by allocating larger - * data structure which embeds blkcg_policy_data at the beginning. - * cpd_init() is invoked to let each policy handle per-blkcg data. - */ -struct blkcg_policy_data { - /* the blkcg and policy id this per-policy data belongs to */ - struct blkcg *blkcg; - int plid; -}; - /* association between a blk cgroup and a request queue */ struct blkcg_gq { /* Pointer to the associated request_queue */ @@ -139,120 +106,17 @@ struct blkcg_gq { struct rcu_head rcu_head; }; -typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); -typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); -typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, - struct request_queue *q, struct blkcg *blkcg); -typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, - struct seq_file *s); - -struct blkcg_policy { - int plid; - /* cgroup files for the policy */ - struct cftype *dfl_cftypes; - struct cftype *legacy_cftypes; - - /* operations */ - blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; - blkcg_pol_init_cpd_fn *cpd_init_fn; - blkcg_pol_free_cpd_fn *cpd_free_fn; - blkcg_pol_bind_cpd_fn *cpd_bind_fn; - - blkcg_pol_alloc_pd_fn *pd_alloc_fn; - blkcg_pol_init_pd_fn *pd_init_fn; - blkcg_pol_online_pd_fn *pd_online_fn; - blkcg_pol_offline_pd_fn *pd_offline_fn; - blkcg_pol_free_pd_fn *pd_free_fn; - blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; - blkcg_pol_stat_pd_fn *pd_stat_fn; -}; - -extern struct blkcg blkcg_root; extern struct cgroup_subsys_state * const blkcg_root_css; -extern bool blkcg_debug_stats; - -struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, - struct request_queue *q, bool update_hint); -int blkcg_init_queue(struct request_queue *q); -void blkcg_exit_queue(struct request_queue *q); - -/* Blkio controller policy registration */ -int blkcg_policy_register(struct blkcg_policy *pol); -void blkcg_policy_unregister(struct blkcg_policy *pol); -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol); -void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol); - -const char *blkg_dev_name(struct blkcg_gq *blkg); -void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, - u64 (*prfill)(struct seq_file *, - struct blkg_policy_data *, int), - const struct blkcg_policy *pol, int data, - bool show_total); -u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); - -struct blkg_conf_ctx { - struct block_device *bdev; - struct blkcg_gq *blkg; - char *body; -}; - -struct block_device *blkcg_conf_open_bdev(char **inputp); -int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - char *input, struct blkg_conf_ctx *ctx); -void blkg_conf_finish(struct blkg_conf_ctx *ctx); -/** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} +void blkcg_destroy_blkgs(struct blkcg *blkcg); +void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_maybe_throttle_current(void); static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -/** - * __bio_blkcg - internal, inconsistent version to get blkcg - * - * DO NOT USE. - * This function is inconsistent and consequently is dangerous to use. The - * first part of the function returns a blkcg where a reference is owned by the - * bio. This means it does not need to be rcu protected as it cannot go away - * with the bio owning a reference to it. However, the latter potentially gets - * it from task_css(). This can race against task migration and the cgroup - * dying. It is also semantically different as it must be called rcu protected - * and is susceptible to failure when trying to get a reference to it. - * Therefore, it is not ok to assume that *_get() will always succeed on the - * blkcg returned here. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - /** * bio_blkcg - grab the blkcg associated with a bio * @bio: target bio @@ -288,22 +152,6 @@ static inline bool blk_cgroup_congested(void) return ret; } -/** - * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg - * @return: true if this bio needs to be submitted with the root blkg context. - * - * In order to avoid priority inversions we sometimes need to issue a bio as if - * it were attached to the root blkg, and then backcharge to the actual owning - * blkg. The idea is we do bio_blkcg() to look up the actual context for the - * bio and attach the appropriate blkg to the bio. Then we call this helper and - * if it is true run with the root blkg for that queue and then do any - * backcharging to the originating cgroup once the io is complete. - */ -static inline bool bio_issue_as_root_blkg(struct bio *bio) -{ - return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; -} - /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest @@ -315,96 +163,6 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) return css_to_blkcg(blkcg->css.parent); } -/** - * __blkg_lookup - internal version of blkg_lookup() - * @blkcg: blkcg of interest - * @q: request_queue of interest - * @update_hint: whether to update lookup hint with the result or not - * - * This is internal version and shouldn't be used by policy - * implementations. Looks up blkgs for the @blkcg - @q pair regardless of - * @q's bypass state. If @update_hint is %true, the caller should be - * holding @q->queue_lock and lookup hint is updated on success. - */ -static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, - bool update_hint) -{ - struct blkcg_gq *blkg; - - if (blkcg == &blkcg_root) - return q->root_blkg; - - blkg = rcu_dereference(blkcg->blkg_hint); - if (blkg && blkg->q == q) - return blkg; - - return blkg_lookup_slowpath(blkcg, q, update_hint); -} - -/** - * blkg_lookup - lookup blkg for the specified blkcg - q pair - * @blkcg: blkcg of interest - * @q: request_queue of interest - * - * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock. - */ -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return __blkg_lookup(blkcg, q, false); -} - -/** - * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair - * @q: request_queue of interest - * - * Lookup blkg for @q at the root level. See also blkg_lookup(). - */ -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ - return q->root_blkg; -} - -/** - * blkg_to_pdata - get policy private data - * @blkg: blkg of interest - * @pol: policy of interest - * - * Return pointer to private data associated with the @blkg-@pol pair. - */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) -{ - return blkg ? blkg->pd[pol->plid] : NULL; -} - -static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, - struct blkcg_policy *pol) -{ - return blkcg ? blkcg->cpd[pol->plid] : NULL; -} - -/** - * pdata_to_blkg - get blkg associated with policy private data - * @pd: policy private data of interest - * - * @pd is policy private data. Determine the blkg it's associated with. - */ -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) -{ - return pd ? pd->blkg : NULL; -} - -static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) -{ - return cpd ? cpd->blkcg : NULL; -} - -extern void blkcg_destroy_blkgs(struct blkcg *blkcg); - /** * blkcg_pin_online - pin online state * @blkcg: blkcg of interest @@ -437,231 +195,24 @@ static inline void blkcg_unpin_online(struct blkcg *blkcg) } while (blkcg); } -/** - * blkg_path - format cgroup path of blkg - * @blkg: blkg of interest - * @buf: target buffer - * @buflen: target buffer length - * - * Format the path of the cgroup of @blkg into @buf. - */ -static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) -{ - return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); -} - -/** - * blkg_get - get a blkg reference - * @blkg: blkg to get - * - * The caller should be holding an existing reference. - */ -static inline void blkg_get(struct blkcg_gq *blkg) -{ - percpu_ref_get(&blkg->refcnt); -} - -/** - * blkg_tryget - try and get a blkg reference - * @blkg: blkg to get - * - * This is for use when doing an RCU lookup of the blkg. We may be in the midst - * of freeing this blkg, so we can only use it if the refcnt is not zero. - */ -static inline bool blkg_tryget(struct blkcg_gq *blkg) -{ - return blkg && percpu_ref_tryget(&blkg->refcnt); -} - -/** - * blkg_put - put a blkg reference - * @blkg: blkg to put - */ -static inline void blkg_put(struct blkcg_gq *blkg) -{ - percpu_ref_put(&blkg->refcnt); -} - -/** - * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU - * read locked. If called under either blkcg or queue lock, the iteration - * is guaranteed to include all and only online blkgs. The caller may - * update @pos_css by calling css_rightmost_descendant() to skip subtree. - * @p_blkg is included in the iteration and the first node to be visited. - */ -#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -/** - * blkg_for_each_descendant_post - post-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Similar to blkg_for_each_descendant_pre() but performs post-order - * traversal instead. Synchronization rules are the same. @p_blkg is - * included in the iteration and the last node to be visited. - */ -#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - -static inline void blkcg_bio_issue_init(struct bio *bio) -{ - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -} - -static inline void blkcg_use_delay(struct blkcg_gq *blkg) -{ - if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) - return; - if (atomic_add_return(1, &blkg->use_delay) == 1) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); -} - -static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - if (WARN_ON_ONCE(old < 0)) - return 0; - if (old == 0) - return 0; - - /* - * We do this song and dance because we can race with somebody else - * adding or removing delay. If we just did an atomic_dec we'd end up - * negative and we'd already be in trouble. We need to subtract 1 and - * then check to see if we were the last delay so we can drop the - * congestion count on the cgroup. - */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); - if (cur == old) - break; - old = cur; - } - - if (old == 0) - return 0; - if (old == 1) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - return 1; -} - -/** - * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount - * @blkg: target blkg - * @delay: delay duration in nsecs - * - * When enabled with this function, the delay is not decayed and must be - * explicitly cleared with blkcg_clear_delay(). Must not be mixed with - * blkcg_[un]use_delay() and blkcg_add_delay() usages. - */ -static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person setting the congestion count for this blkg. */ - if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); - - atomic64_set(&blkg->delay_nsec, delay); -} - -/** - * blkcg_clear_delay - Disable allocator delay mechanism - * @blkg: target blkg - * - * Disable use_delay mechanism. See blkcg_set_delay(). - */ -static inline void blkcg_clear_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person clearing the congestion count for this blkg. */ - if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); -} - -void blk_cgroup_bio_start(struct bio *bio); -void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); -void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); -void blkcg_maybe_throttle_current(void); #else /* CONFIG_BLK_CGROUP */ struct blkcg { }; -struct blkg_policy_data { -}; - -struct blkcg_policy_data { -}; - struct blkcg_gq { }; -struct blkcg_policy { -}; - #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } #ifdef CONFIG_BLOCK - static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } - -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ return NULL; } -static inline int blkcg_init_queue(struct request_queue *q) { return 0; } -static inline void blkcg_exit_queue(struct request_queue *q) { } -static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } -static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } -static inline int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { return 0; } -static inline void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { } - -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } +#endif /* CONFIG_BLOCK */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) { return NULL; } -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } -static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } -static inline void blkg_get(struct blkcg_gq *blkg) { } -static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } -static inline void blkcg_bio_issue_init(struct bio *bio) { } -static inline void blk_cgroup_bio_start(struct bio *bio) { } - -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) - -#endif /* CONFIG_BLOCK */ #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_CGROUP_FC_APPID -- GitLab From 2d03861e0d1d1ee81efc59338101cdd86a7474f6 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 8 Feb 2022 13:48:39 -0800 Subject: [PATCH 0351/1586] selftests/sgx: Fix NULL-pointer-dereference upon early test failure == Background == The SGX selftests track parts of the enclave binaries in an array: encl->segment_tbl[]. That array is dynamically allocated early (but not first) in the test's lifetime. The array is referenced at the end of the test in encl_delete(). == Problem == encl->segment_tbl[] can be NULL if the test fails before its allocation. That leads to a NULL-pointer-dereference in encl_delete(). This is triggered during early failures of the selftest like if the enclave binary ("test_encl.elf") is deleted. == Solution == Ensure encl->segment_tbl[] is valid before attempting to access its members. The offset with which it is accessed, encl->nr_segments, is initialized before encl->segment_tbl[] and thus considered valid to use after the encl->segment_tbl[] check succeeds. Fixes: 3200505d4de6 ("selftests/sgx: Create a heap for the test enclave") Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Shuah Khan Link: https://lkml.kernel.org/r/90a31dfd640ea756fa324712e7cbab4a90fa7518.1644355600.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/load.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d4322c946e2b..006b464c8fc94 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,7 +21,7 @@ void encl_delete(struct encl *encl) { - struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + struct encl_segment *heap_seg; if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -32,10 +32,11 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); - munmap(heap_seg->src, heap_seg->size); - - if (encl->segment_tbl) + if (encl->segment_tbl) { + heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + munmap(heap_seg->src, heap_seg->size); free(encl->segment_tbl); + } memset(encl, 0, sizeof(*encl)); } -- GitLab From fff36bcbfde1126f6b81cb8ee12a58aada17ca29 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 8 Feb 2022 13:48:40 -0800 Subject: [PATCH 0352/1586] selftests/sgx: Do not attempt enclave build without valid enclave It is not possible to build an enclave if it was not possible to load the binary from which it should be constructed. Do not attempt to make further progress but instead return with failure. A "return false" from setup_test_encl() is expected to trip an ASSERT_TRUE() and abort the rest of the test. Fixes: 1b35eb719549 ("selftests/sgx: Encpsulate the test enclave creation") Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Acked-by: Shuah Khan Link: https://lkml.kernel.org/r/e3778c77f95e6dca348c732b12f155051d2899b4.1644355600.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 370c4995f7c4a..a7cd2c3e6f7e1 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -147,6 +147,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, if (!encl_load("test_encl.elf", encl, heap_size)) { encl_delete(encl); TH_LOG("Failed to load the test enclave.\n"); + return false; } if (!encl_measure(encl)) -- GitLab From 2db703fc3b15e7ef68c82eca613a3c00d43d70af Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 8 Feb 2022 13:48:41 -0800 Subject: [PATCH 0353/1586] selftests/sgx: Ensure enclave data available during debug print In support of debugging the SGX tests print details from the enclave and its memory mappings if any failure is encountered during enclave loading. When a failure is encountered no data is printed because the printing of the data is preceded by cleanup of the data. Move the data cleanup after the data print. Fixes: 147172148909 ("selftests/sgx: Dump segments and /proc/self/maps only on failure") Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Shuah Khan Link: https://lkml.kernel.org/r/dab672f771e9b99e50c17ae2a75dc0b020cb0ce9.1644355600.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index a7cd2c3e6f7e1..b0bd95a4730d5 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -186,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, return true; err: - encl_delete(encl); - for (i = 0; i < encl->nr_segments; i++) { seg = &encl->segment_tbl[i]; @@ -208,6 +206,8 @@ err: TH_LOG("Failed to initialize the test enclave.\n"); + encl_delete(encl); + return false; } -- GitLab From 5626de65f97ae152e6dafdc528a36c1cbb7146ee Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 8 Feb 2022 13:48:42 -0800 Subject: [PATCH 0354/1586] selftests/sgx: Remove extra newlines in test output The TH_LOG() macro is an optional debug logging function made available by kselftest itself. When TH_LOG_ENABLED is set it prints the provided message with additional information and formatting that already includes a newline. Providing a newline to the message printed by TH_LOG() results in a double newline that produces irregular test output. Remove the unnecessary newlines from the text provided to TH_LOG(). Fixes: 1b35eb719549 ("selftests/sgx: Encpsulate the test enclave creation") Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Acked-by: Dave Hansen Acked-by: Shuah Khan Link: https://lkml.kernel.org/r/6fd171ba622aed172a7c5b129d34d50bd0482f24.1644355600.git.reinette.chatre@intel.com --- tools/testing/selftests/sgx/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index b0bd95a4730d5..dd74fa42302e0 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -146,7 +146,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, if (!encl_load("test_encl.elf", encl, heap_size)) { encl_delete(encl); - TH_LOG("Failed to load the test enclave.\n"); + TH_LOG("Failed to load the test enclave."); return false; } @@ -204,7 +204,7 @@ err: fclose(maps_file); } - TH_LOG("Failed to initialize the test enclave.\n"); + TH_LOG("Failed to initialize the test enclave."); encl_delete(encl); -- GitLab From 2e2f0199a20780463945e1cbffb3a191fa84bd9d Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Mon, 7 Feb 2022 15:49:31 +0800 Subject: [PATCH 0355/1586] docs: block: biodoc.rst: Drop the obsolete and incorrect content Since commit 7eaceaccab5f ("block: remove per-queue plugging"), kernel has removed blk_run_address_space(), blk_unplug() and sync_buffer(), and moved to on-stack plugging. The document has been obsolete for years. Given that there is no obvious counterparts in the new mechinism to replace old APIs, this patch drops the content directly. Signed-off-by: Barry Song Link: https://lore.kernel.org/r/20220207074931.20067-1-song.bao.hua@hisilicon.com Signed-off-by: Jens Axboe --- Documentation/block/biodoc.rst | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst index 4fbc367e62f95..fdebc0fd8468f 100644 --- a/Documentation/block/biodoc.rst +++ b/Documentation/block/biodoc.rst @@ -965,11 +965,7 @@ till it fills up with a few more requests, before starting to service the requests. This provides an opportunity to merge/sort the requests before passing them down to the device. There are various conditions when the queue is unplugged (to open up the flow again), either through a scheduled task or -could be on demand. For example wait_on_buffer sets the unplugging going -through sync_buffer() running blk_run_address_space(mapping). Or the caller -can do it explicity through blk_unplug(bdev). So in the read case, -the queue gets explicitly unplugged as part of waiting for completion on that -buffer. +could be on demand. Aside: This is kind of controversial territory, as it's not clear if plugging is -- GitLab From 5f117033243488a0080f837540c27999aa31870e Mon Sep 17 00:00:00 2001 From: Marco Bonelli Date: Fri, 11 Feb 2022 17:23:50 +0100 Subject: [PATCH 0356/1586] x86/head64: Add missing __head annotation to sme_postprocess_startup() This function was previously part of __startup_64() which is marked __head, and is currently only called from there. Mark it __head too. Signed-off-by: Marco Bonelli Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220211162350.11780-1-marco@mebeim.net --- arch/x86/kernel/head64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index de563db9cdcd2..4f5ecbbaae77c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -126,7 +126,7 @@ static bool __head check_la57_support(unsigned long physaddr) } #endif -static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) +static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) { unsigned long vaddr, vaddr_end; int i; -- GitLab From 47c8ebcce85ed7113e9e3e3f1d8c6374fa87848e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jan 2022 13:31:43 -0800 Subject: [PATCH 0357/1586] f2fs: add a way to limit roll forward recovery time This adds a sysfs entry to call checkpoint during fsync() in order to avoid long elapsed time to run roll-forward recovery when booting the device. Default value doesn't enforce the limitation which is same as before. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/checkpoint.c | 1 + fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/node.c | 2 ++ fs/f2fs/node.h | 3 +++ fs/f2fs/recovery.c | 4 ++++ fs/f2fs/super.c | 14 ++++++++++++-- fs/f2fs/sysfs.c | 2 ++ 9 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 7b50bf82f14dd..58bf0dc83712d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -568,3 +568,9 @@ Contact: "Daeho Jeong" Description: You can set the trial count limit for GC urgent high mode with this value. If GC thread gets to the limit, the mode will turn back to GC normal mode. By default, the value is zero, which means there is no limit like before. + +What: /sys/fs/f2fs//max_roll_forward_node_blocks +Date: January 2022 +Contact: "Jaegeuk Kim" +Description: Controls max # of node block writes to be used for roll forward + recovery. This can limit the roll forward recovery time. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a13b6b4af220a..203a1577942d3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1547,6 +1547,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* update user_block_counts */ sbi->last_valid_block_count = sbi->total_valid_block_count; percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a99..9a13902c77026 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -532,6 +532,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", si->ndirty_imeta); + seq_printf(s, " - fsync mark: %4lld\n", + percpu_counter_sum_positive( + &si->sbi->rf_node_block_count)); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3b4bf1c3f1ed6..c9515c3c54fd5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -917,6 +917,7 @@ struct f2fs_nm_info { nid_t max_nid; /* maximum possible node ids */ nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ + nid_t max_rf_node_blocks; /* max # of nodes for recovery */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */ @@ -1688,6 +1689,8 @@ struct f2fs_sb_info { atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* # of node block writes as roll forward recovery */ + struct percpu_counter rf_node_block_count; /* writeback control */ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 93512f8859d5d..0d98834575797 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1782,6 +1782,7 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); + percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) @@ -3218,6 +3219,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; + nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 18b98cf0465b8..4c1d34bfea781 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,6 +31,9 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 +/* control total # of node writes used for roll-fowrad recovery */ +#define DEF_RF_NODE_BLOCKS 0 + /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 #define SETVEC_SIZE 32 diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2af503f75b4fa..ab33e474af070 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -56,6 +56,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) return false; + if (NM_I(sbi)->max_rf_node_blocks && + percpu_counter_sum_positive(&sbi->rf_node_block_count) >= + NM_I(sbi)->max_rf_node_blocks) + return false; return true; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 806836184ebc9..f816d7d1987d9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1501,8 +1501,9 @@ static void f2fs_free_inode(struct inode *inode) static void destroy_percpu_info(struct f2fs_sb_info *sbi) { - percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + percpu_counter_destroy(&sbi->rf_node_block_count); + percpu_counter_destroy(&sbi->alloc_valid_block_count); } static void destroy_device_list(struct f2fs_sb_info *sbi) @@ -3619,11 +3620,20 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; + err = percpu_counter_init(&sbi->rf_node_block_count, 0, GFP_KERNEL); + if (err) + goto err_valid_block; + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); if (err) - percpu_counter_destroy(&sbi->alloc_valid_block_count); + goto err_node_block; + return 0; +err_node_block: + percpu_counter_destroy(&sbi->rf_node_block_count); +err_valid_block: + percpu_counter_destroy(&sbi->alloc_valid_block_count); return err; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 281bc0133ee6f..47efcf233afdd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -732,6 +732,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); @@ -855,6 +856,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(max_roll_forward_node_blocks), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), ATTR_LIST(discard_idle_interval), -- GitLab From 984fc4e76d63345499f01c0c198a4b44860cf027 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 13:24:56 +0800 Subject: [PATCH 0358/1586] f2fs: support idmapped mounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables idmapped mounts for f2fs, since all dedicated helpers for this functionality existsm, so, in this patch we just pass down the user_namespace argument from the VFS methods to the relevant helpers. Simple idmap example on f2fs image: 1. truncate -s 128M f2fs.img 2. mkfs.f2fs f2fs.img 3. mount f2fs.img /mnt/f2fs/ 4. touch /mnt/f2fs/file 5. ls -ln /mnt/f2fs/ total 0 -rw-r--r-- 1 0 0 0 2月 4 13:17 file 6. ./mount-idmapped --map-mount b:0:1001:1 /mnt/f2fs/ /mnt/scratch_f2fs/ 7. ls -ln /mnt/scratch_f2fs/ total 0 -rw-r--r-- 1 1001 1001 0 2月 4 13:17 file Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 21 ++++++++++++--------- fs/f2fs/file.c | 23 ++++++++++++++--------- fs/f2fs/namei.c | 41 +++++++++++++++++++++++------------------ fs/f2fs/super.c | 2 +- 4 files changed, 50 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 16e826e01f095..eaa240b21f071 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -204,8 +204,9 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu) return __f2fs_get_acl(inode, type, NULL); } -static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p, - struct posix_acl **acl) +static int f2fs_acl_update_mode(struct user_namespace *mnt_userns, + struct inode *inode, umode_t *mode_p, + struct posix_acl **acl) { umode_t mode = inode->i_mode; int error; @@ -218,14 +219,15 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p, return error; if (error == 0) *acl = NULL; - if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) && - !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) + if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) && + !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) mode &= ~S_ISGID; *mode_p = mode; return 0; } -static int __f2fs_set_acl(struct inode *inode, int type, +static int __f2fs_set_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { int name_index; @@ -238,7 +240,8 @@ static int __f2fs_set_acl(struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl && !ipage) { - error = f2fs_acl_update_mode(inode, &mode, &acl); + error = f2fs_acl_update_mode(mnt_userns, inode, + &mode, &acl); if (error) return error; set_acl_inode(inode, mode); @@ -279,7 +282,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - return __f2fs_set_acl(inode, type, acl, NULL); + return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL); } /* @@ -419,7 +422,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, f2fs_mark_inode_dirty_sync(inode, true); if (default_acl) { - error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); } else { @@ -427,7 +430,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, } if (acl) { if (!error) - error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); } else { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 42fbdcf0ccc9b..cfdc41f87f5de 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -844,7 +844,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(mnt_userns, inode, stat); /* we need to show initial sectors used for inline_data/dentries */ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || @@ -904,7 +904,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, !f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(mnt_userns, dentry, attr); if (err) return err; @@ -980,10 +980,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, spin_unlock(&F2FS_I(inode)->i_size_lock); } - __setattr_copy(&init_user_ns, inode, attr); + __setattr_copy(mnt_userns, inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode)); + err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode)); if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) @@ -1989,11 +1989,12 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2058,9 +2059,10 @@ out: static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2100,9 +2102,10 @@ err_out: static int f2fs_ioc_start_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2135,9 +2138,10 @@ out: static int f2fs_ioc_release_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2164,9 +2168,10 @@ out: static int f2fs_ioc_abort_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0347c5780910a..2b23a76bdae92 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -22,7 +22,8 @@ #include "acl.h" #include -static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, + struct inode *dir, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); nid_t ino; @@ -46,7 +47,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(mnt_userns, inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; @@ -67,7 +68,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + F2FS_I(inode)->i_projid = make_kprojid(mnt_userns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -349,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -679,7 +680,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -750,7 +751,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, S_IFDIR | mode); + inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -807,7 +808,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -834,8 +835,9 @@ out: return err; } -static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode, struct inode **whiteout) +static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, + struct inode **whiteout) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; @@ -845,7 +847,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -909,20 +911,22 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - return __f2fs_tmpfile(dir, dentry, mode, NULL); + return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL); } -static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) +static int f2fs_create_whiteout(struct user_namespace *mnt_userns, + struct inode *dir, struct inode **whiteout) { if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) return -EIO; - return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); + return __f2fs_tmpfile(mnt_userns, dir, NULL, + S_IFCHR | WHITEOUT_MODE, whiteout); } -static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); @@ -960,7 +964,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); + err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout); if (err) return err; } @@ -1300,7 +1304,8 @@ static int f2fs_rename2(struct user_namespace *mnt_userns, * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ - return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); + return f2fs_rename(mnt_userns, old_dir, old_dentry, + new_dir, new_dentry, flags); } static const char *f2fs_encrypted_get_link(struct dentry *dentry, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f816d7d1987d9..22fb4d3b11702 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4539,7 +4539,7 @@ static struct file_system_type f2fs_fs_type = { .name = "f2fs", .mount = f2fs_mount, .kill_sb = kill_f2fs_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("f2fs"); -- GitLab From 73ab4a3509e6b8f93b87398db2aaabd3c9cbe487 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Aug 2021 08:03:08 -0700 Subject: [PATCH 0359/1586] KVM: x86: Replace memset() "optimization" with normal per-field writes Explicitly zero select fields in the emulator's decode cache instead of zeroing the fields via a gross memset() that spans six fields. gcc and clang are both clever enough to batch the first five fields into a single quadword MOV, i.e. memset() and individually zeroing generate identical code. Removing the wart also prepares KVM for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memset(). No functional change intended. Reported-by: Kees Cook Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/lkml/YR0jIEzEcUom/7rd@google.com Signed-off-by: Kees Cook --- arch/x86/kvm/emulate.c | 9 +++++++-- arch/x86/kvm/kvm_emulate.h | 6 +----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5719d8cfdbd90..6b820cc2b51b9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5380,8 +5380,13 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->rip_relative, 0, - (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); + /* Clear fields that are set conditionally but read without a guard. */ + ctxt->rip_relative = false; + ctxt->rex_prefix = 0; + ctxt->lock_prefix = 0; + ctxt->rep_prefix = 0; + ctxt->regs_valid = 0; + ctxt->regs_dirty = 0; ctxt->io_read.pos = 0; ctxt->io_read.end = 0; diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 39eded2426ffd..840ddb4a93025 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -336,11 +336,7 @@ struct x86_emulate_ctxt { fastop_t fop; }; int (*check_perm)(struct x86_emulate_ctxt *ctxt); - /* - * The following six fields are cleared together, - * the rest are initialized unconditionally in x86_decode_insn - * or elsewhere - */ + bool rip_relative; u8 rex_prefix; u8 lock_prefix; -- GitLab From 9ed0a59c0cbb663ea09588322498499a937d08fa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 May 2021 21:36:48 -0700 Subject: [PATCH 0360/1586] intel_th: msu: Use memset_startat() for clearing hw header In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memset(), avoid intentionally writing across neighboring fields. Use memset_startat() so memset() doesn't get confused about writing beyond the destination member that is intended to be the starting point of zeroing through the end of the struct. Acked-by: Alexander Shishkin Link: https://lore.kernel.org/lkml/87sfyzi97l.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Kees Cook --- drivers/hwtracing/intel_th/msu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 432ade0842f68..70a07b4e99673 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -658,13 +658,11 @@ static void msc_buffer_clear_hw_header(struct msc *msc) list_for_each_entry(win, &msc->win_list, entry) { unsigned int blk; - size_t hw_sz = sizeof(struct msc_block_desc) - - offsetof(struct msc_block_desc, hw_tag); for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) { struct msc_block_desc *bdesc = sg_virt(sg); - memset(&bdesc->hw_tag, 0, hw_sz); + memset_startat(bdesc, 0, hw_tag); } } } -- GitLab From 2a55550fbba643f41318bb12ab20e6510c7df231 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 2 Feb 2022 14:48:34 -0800 Subject: [PATCH 0361/1586] m68k: cmpxchg: Dereference matching size Similar to the recent arm64 fix[1], avoid overly wide casts in the m68k cmpxchg implementation. Avoids this warning under -Warray-bounds with GCC 11: net/sched/cls_tcindex.c: In function 'tcindex_set_parms': ./arch/m68k/include/asm/cmpxchg.h:64:17: warning: array subscript 'volatile struct __xchg_dummy[0]' is partly outside array bounds of 'struct tcf_result[1]' [-Warray-bounds] 64 | __asm__ __volatile__ | ^~~~~~~ net/sched/cls_tcindex.c:338:27: note: while referencing 'cr' 338 | struct tcf_result cr = {}; | ^~ No binary output difference are seen from this change. [1] commit 3364c6ce23c6 ("arm64: atomics: lse: Dereference matching size") Cc: "Peter Zijlstra (Intel)" Cc: Mark Rutland Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/lkml/CAMuHMdVRrD+2zKoHxAaQdDuiK5JFDanbv0SJ91OdWfx+eyekPQ@mail.gmail.com Signed-off-by: Kees Cook --- arch/m68k/include/asm/cmpxchg.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h index e8ca4b0ccefaa..6cf464cdab067 100644 --- a/arch/m68k/include/asm/cmpxchg.h +++ b/arch/m68k/include/asm/cmpxchg.h @@ -4,8 +4,7 @@ #include -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((volatile struct __xchg_dummy *)(x)) +#define __xg(type, x) ((volatile type *)(x)) extern unsigned long __invalid_xchg_size(unsigned long, volatile void *, int); @@ -50,7 +49,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz "1:\n\t" "casb %0,%1,%2\n\t" "jne 1b" - : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory"); + : "=&d" (x) : "d" (x), "m" (*__xg(u8, ptr)) : "memory"); break; case 2: __asm__ __volatile__ @@ -58,7 +57,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz "1:\n\t" "casw %0,%1,%2\n\t" "jne 1b" - : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory"); + : "=&d" (x) : "d" (x), "m" (*__xg(u16, ptr)) : "memory"); break; case 4: __asm__ __volatile__ @@ -66,7 +65,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz "1:\n\t" "casl %0,%1,%2\n\t" "jne 1b" - : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory"); + : "=&d" (x) : "d" (x), "m" (*__xg(u32, ptr)) : "memory"); break; default: x = __invalid_xchg_size(x, ptr, size); -- GitLab From 545c272232caded141abb273745091d6a0d8b14b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 2 Feb 2022 14:13:15 -0800 Subject: [PATCH 0362/1586] alpha: Silence -Warray-bounds warnings GCC 11 (incorrectly[1]) assumes that literal values cast to (void *) should be treated like a NULL pointer with an offset, and raises diagnostics when doing bounds checking: In function '__memset', inlined from '__bad_pagetable' at arch/alpha/mm/init.c:79:2: ./arch/alpha/include/asm/string.h:37:32: error: '__builtin_memset' offset [0, 8191] is out of the bounds [0, 0] [-Werror=array-bounds] 37 | return __builtin_memset(s, c, n); | ^~~~~~~~~~~~~~~~~~~~~~~~~ In function '__memset', inlined from '__bad_page' at arch/alpha/mm/init.c:86:2: ./arch/alpha/include/asm/string.h:37:32: error: '__builtin_memset' offset [0, 8191] is out of the bounds [0, 0] [-Werror=array-bounds] 37 | return __builtin_memset(s, c, n); | ^~~~~~~~~~~~~~~~~~~~~~~~~ In function '__memset', inlined from 'paging_init' at arch/alpha/mm/init.c:256:2: ./arch/alpha/include/asm/string.h:37:32: error: '__builtin_memset' offset [0, 8191] is out of the bounds [0, 0] [-Werror=array-bounds] 37 | return __builtin_memset(s, c, n); This has been solved in other places[2] already by using the recently added absolute_pointer() macro. Do the same here. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578 [2] https://lore.kernel.org/all/20210912160149.2227137-1-linux@roeck-us.net/ Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Guenter Roeck Cc: linux-alpha@vger.kernel.org Reviewed-and-tested-by: Guenter Roeck Signed-off-by: Kees Cook --- arch/alpha/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index f6114d03357c4..7511723b76693 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -76,14 +76,14 @@ pgd_alloc(struct mm_struct *mm) pmd_t * __bad_pagetable(void) { - memset((void *) EMPTY_PGT, 0, PAGE_SIZE); + memset(absolute_pointer(EMPTY_PGT), 0, PAGE_SIZE); return (pmd_t *) EMPTY_PGT; } pte_t __bad_page(void) { - memset((void *) EMPTY_PGE, 0, PAGE_SIZE); + memset(absolute_pointer(EMPTY_PGE), 0, PAGE_SIZE); return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED)); } @@ -253,7 +253,7 @@ void __init paging_init(void) free_area_init(max_zone_pfn); /* Initialize the kernel's ZERO_PGE. */ - memset((void *)ZERO_PGE, 0, PAGE_SIZE); + memset(absolute_pointer(ZERO_PGE), 0, PAGE_SIZE); } #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM) -- GitLab From 2d2329787ba2e70eae330f1cecd61a9576fe65de Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 12 Feb 2022 06:31:44 -0800 Subject: [PATCH 0363/1586] regmap: irq: cleanup comments Replace the second 'which' with 'the'. Change 'acknowleding' to 'acknowledging'. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20220212143144.2648689-1-trix@redhat.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index d5604f4972967..81f6356f05892 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -537,7 +537,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) /* * Ignore masked IRQs and ack if we need to; we ack early so - * there is no race between handling and acknowleding the + * there is no race between handling and acknowledging the * interrupt. We assume that typically few of the interrupts * will fire simultaneously so don't worry about overhead from * doing a write per register. @@ -1082,7 +1082,7 @@ EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip); /** * devm_regmap_del_irq_chip() - Resource managed regmap_del_irq_chip() * - * @dev: Device for which which resource was allocated. + * @dev: Device for which the resource was allocated. * @irq: Primary IRQ for the device. * @data: ®map_irq_chip_data allocated by regmap_add_irq_chip(). * -- GitLab From a94e5cd8457fb46866459562ef6c53f9dcc375f7 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 11 Feb 2022 14:24:49 +0100 Subject: [PATCH 0364/1586] regulator: tps62864: Fix bindings for SW property The property is not a pattern, so it should use 'properties' instead of 'patternProperties'. Also, unevaluatedProperties should be set to false like in the other regulator bindings. Reported-by: Rob Herring Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220211132449.2899924-1-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/regulator/ti,tps62864.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml index e3d739a37ab04..0f29c75f42ea6 100644 --- a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml +++ b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml @@ -23,10 +23,11 @@ properties: regulators: type: object - patternProperties: + properties: "SW": type: object $ref: regulator.yaml# + unevaluatedProperties: false additionalProperties: false -- GitLab From 715bea3568e78b80b1d127d8452eac0e3cb6f299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 11 Feb 2022 11:31:53 -0300 Subject: [PATCH 0365/1586] spi: amd: Use iopoll for busy waiting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of implementing a custom IO busy wait function, just use readl_poll_timeout(). Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20220211143155.75513-2-andrealmeid@collabora.com Signed-off-by: Mark Brown --- drivers/spi/spi-amd.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 4b3ac7aceaf62..899b8d90ff61b 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -12,6 +12,7 @@ #include #include #include +#include #define AMD_SPI_CTRL0_REG 0x00 #define AMD_SPI_EXEC_CMD BIT(16) @@ -103,16 +104,10 @@ static inline void amd_spi_set_tx_count(struct amd_spi *amd_spi, u8 tx_count) static int amd_spi_busy_wait(struct amd_spi *amd_spi) { - int timeout = 100000; + u32 val; - /* poll for SPI bus to become idle */ - while (amd_spi_readreg32(amd_spi, AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) { - usleep_range(10, 20); - if (timeout-- < 0) - return -ETIMEDOUT; - } - - return 0; + return readl_poll_timeout(amd_spi->io_remap_addr + AMD_SPI_CTRL0_REG, + val, !(val & AMD_SPI_BUSY), 20, 2000000); } static int amd_spi_execute_opcode(struct amd_spi *amd_spi) -- GitLab From fbc71367288cf1902822618e00c7c1cf6f35348d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 11 Feb 2022 11:31:54 -0300 Subject: [PATCH 0366/1586] spi: amd: Remove needless rom_addr variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rom_addr is not used in the code, so we can just drop it from struct amd_spi. Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20220211143155.75513-3-andrealmeid@collabora.com Signed-off-by: Mark Brown --- drivers/spi/spi-amd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 899b8d90ff61b..417ce14a21c63 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -38,7 +38,6 @@ struct amd_spi { void __iomem *io_remap_addr; unsigned long io_base_addr; - u32 rom_addr; }; static inline u8 amd_spi_readreg8(struct amd_spi *amd_spi, int idx) -- GitLab From 209043554915d7c51ac112a668ad1a255e1bea61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 11 Feb 2022 11:31:55 -0300 Subject: [PATCH 0367/1586] spi: amd: Add support for version AMDI0062 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the AMD SPI controller version AMDI0062. Do this in a modular way where's easy to add new versions. Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20220211143155.75513-4-andrealmeid@collabora.com Signed-off-by: Mark Brown --- drivers/spi/spi-amd.c | 81 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 417ce14a21c63..d909afac6e219 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -19,6 +19,10 @@ #define AMD_SPI_FIFO_CLEAR BIT(20) #define AMD_SPI_BUSY BIT(31) +#define AMD_SPI_OPCODE_REG 0x45 +#define AMD_SPI_CMD_TRIGGER_REG 0x47 +#define AMD_SPI_TRIGGER_CMD BIT(7) + #define AMD_SPI_OPCODE_MASK 0xFF #define AMD_SPI_ALT_CS_REG 0x1D @@ -35,9 +39,15 @@ #define AMD_SPI_XFER_TX 1 #define AMD_SPI_XFER_RX 2 +enum amd_spi_versions { + AMD_SPI_V1 = 1, /* AMDI0061 */ + AMD_SPI_V2, /* AMDI0062 */ +}; + struct amd_spi { void __iomem *io_remap_addr; unsigned long io_base_addr; + enum amd_spi_versions version; }; static inline u8 amd_spi_readreg8(struct amd_spi *amd_spi, int idx) @@ -81,14 +91,29 @@ static void amd_spi_select_chip(struct amd_spi *amd_spi, u8 cs) amd_spi_setclear_reg8(amd_spi, AMD_SPI_ALT_CS_REG, cs, AMD_SPI_ALT_CS_MASK); } +static inline void amd_spi_clear_chip(struct amd_spi *amd_spi, u8 chip_select) +{ + amd_spi_writereg8(amd_spi, AMD_SPI_ALT_CS_REG, chip_select & ~AMD_SPI_ALT_CS_MASK); +} + static void amd_spi_clear_fifo_ptr(struct amd_spi *amd_spi) { amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_FIFO_CLEAR, AMD_SPI_FIFO_CLEAR); } -static void amd_spi_set_opcode(struct amd_spi *amd_spi, u8 cmd_opcode) +static int amd_spi_set_opcode(struct amd_spi *amd_spi, u8 cmd_opcode) { - amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, cmd_opcode, AMD_SPI_OPCODE_MASK); + switch (amd_spi->version) { + case AMD_SPI_V1: + amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, cmd_opcode, + AMD_SPI_OPCODE_MASK); + return 0; + case AMD_SPI_V2: + amd_spi_writereg8(amd_spi, AMD_SPI_OPCODE_REG, cmd_opcode); + return 0; + default: + return -ENODEV; + } } static inline void amd_spi_set_rx_count(struct amd_spi *amd_spi, u8 rx_count) @@ -104,9 +129,21 @@ static inline void amd_spi_set_tx_count(struct amd_spi *amd_spi, u8 tx_count) static int amd_spi_busy_wait(struct amd_spi *amd_spi) { u32 val; + int reg; + + switch (amd_spi->version) { + case AMD_SPI_V1: + reg = AMD_SPI_CTRL0_REG; + break; + case AMD_SPI_V2: + reg = AMD_SPI_STATUS_REG; + break; + default: + return -ENODEV; + } - return readl_poll_timeout(amd_spi->io_remap_addr + AMD_SPI_CTRL0_REG, - val, !(val & AMD_SPI_BUSY), 20, 2000000); + return readl_poll_timeout(amd_spi->io_remap_addr + reg, val, + !(val & AMD_SPI_BUSY), 20, 2000000); } static int amd_spi_execute_opcode(struct amd_spi *amd_spi) @@ -117,10 +154,20 @@ static int amd_spi_execute_opcode(struct amd_spi *amd_spi) if (ret) return ret; - /* Set ExecuteOpCode bit in the CTRL0 register */ - amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD, AMD_SPI_EXEC_CMD); - - return 0; + switch (amd_spi->version) { + case AMD_SPI_V1: + /* Set ExecuteOpCode bit in the CTRL0 register */ + amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD, + AMD_SPI_EXEC_CMD); + return 0; + case AMD_SPI_V2: + /* Trigger the command execution */ + amd_spi_setclear_reg8(amd_spi, AMD_SPI_CMD_TRIGGER_REG, + AMD_SPI_TRIGGER_CMD, AMD_SPI_TRIGGER_CMD); + return 0; + default: + return -ENODEV; + } } static int amd_spi_master_setup(struct spi_device *spi) @@ -190,6 +237,17 @@ static inline int amd_spi_fifo_xfer(struct amd_spi *amd_spi, message->actual_length = tx_len + rx_len + 1; /* complete the transaction */ message->status = 0; + + switch (amd_spi->version) { + case AMD_SPI_V1: + break; + case AMD_SPI_V2: + amd_spi_clear_chip(amd_spi, message->spi->chip_select); + break; + default: + return -ENODEV; + } + spi_finalize_current_message(master); return 0; @@ -235,6 +293,8 @@ static int amd_spi_probe(struct platform_device *pdev) } dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr); + amd_spi->version = (enum amd_spi_versions) device_get_match_data(dev); + /* Initialize the spi_master fields */ master->bus_num = 0; master->num_chipselect = 4; @@ -260,7 +320,8 @@ err_free_master: #ifdef CONFIG_ACPI static const struct acpi_device_id spi_acpi_match[] = { - { "AMDI0061", 0 }, + { "AMDI0061", AMD_SPI_V1 }, + { "AMDI0062", AMD_SPI_V2 }, {}, }; MODULE_DEVICE_TABLE(acpi, spi_acpi_match); @@ -269,7 +330,7 @@ MODULE_DEVICE_TABLE(acpi, spi_acpi_match); static struct platform_driver amd_spi_driver = { .driver = { .name = "amd_spi", - .acpi_match_table = ACPI_PTR(spi_acpi_match), + .acpi_match_table = spi_acpi_match, }, .probe = amd_spi_probe, }; -- GitLab From cd149eff8d2201a63c074a6d9d03e52926aa535d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 9 Feb 2022 15:27:04 +0300 Subject: [PATCH 0368/1586] mtd: spi-nor: intel-spi: Disable write protection only if asked Currently the driver tries to disable the BIOS write protection automatically even if this is not what the user wants. For this reason modify the driver so that by default it does not touch the write protection. Only if specifically asked by the user (setting writeable=1 command line parameter) the driver tries to disable the BIOS write protection. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Mauro Lima Reviewed-by: Tudor Ambarus Acked-by: Lee Jones Link: https://lore.kernel.org/r/20220209122706.42439-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/mfd/lpc_ich.c | 59 +++++++++++++++++-- .../mtd/spi-nor/controllers/intel-spi-pci.c | 29 +++++---- drivers/mtd/spi-nor/controllers/intel-spi.c | 41 ++++++------- include/linux/platform_data/x86/intel-spi.h | 6 +- 4 files changed, 96 insertions(+), 39 deletions(-) diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c index f10e53187f67a..9ffab9aafd81b 100644 --- a/drivers/mfd/lpc_ich.c +++ b/drivers/mfd/lpc_ich.c @@ -63,6 +63,8 @@ #define SPIBASE_BYT 0x54 #define SPIBASE_BYT_SZ 512 #define SPIBASE_BYT_EN BIT(1) +#define BYT_BCR 0xfc +#define BYT_BCR_WPD BIT(0) #define SPIBASE_LPT 0x3800 #define SPIBASE_LPT_SZ 512 @@ -1084,12 +1086,57 @@ wdt_done: return ret; } +static bool lpc_ich_byt_set_writeable(void __iomem *base, void *data) +{ + u32 val; + + val = readl(base + BYT_BCR); + if (!(val & BYT_BCR_WPD)) { + val |= BYT_BCR_WPD; + writel(val, base + BYT_BCR); + val = readl(base + BYT_BCR); + } + + return val & BYT_BCR_WPD; +} + +static bool lpc_ich_lpt_set_writeable(void __iomem *base, void *data) +{ + struct pci_dev *pdev = data; + u32 bcr; + + pci_read_config_dword(pdev, BCR, &bcr); + if (!(bcr & BCR_WPD)) { + bcr |= BCR_WPD; + pci_write_config_dword(pdev, BCR, bcr); + pci_read_config_dword(pdev, BCR, &bcr); + } + + return bcr & BCR_WPD; +} + +static bool lpc_ich_bxt_set_writeable(void __iomem *base, void *data) +{ + unsigned int spi = PCI_DEVFN(13, 2); + struct pci_bus *bus = data; + u32 bcr; + + pci_bus_read_config_dword(bus, spi, BCR, &bcr); + if (!(bcr & BCR_WPD)) { + bcr |= BCR_WPD; + pci_bus_write_config_dword(bus, spi, BCR, bcr); + pci_bus_read_config_dword(bus, spi, BCR, &bcr); + } + + return bcr & BCR_WPD; +} + static int lpc_ich_init_spi(struct pci_dev *dev) { struct lpc_ich_priv *priv = pci_get_drvdata(dev); struct resource *res = &intel_spi_res[0]; struct intel_spi_boardinfo *info; - u32 spi_base, rcba, bcr; + u32 spi_base, rcba; info = devm_kzalloc(&dev->dev, sizeof(*info), GFP_KERNEL); if (!info) @@ -1103,6 +1150,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) if (spi_base & SPIBASE_BYT_EN) { res->start = spi_base & ~(SPIBASE_BYT_SZ - 1); res->end = res->start + SPIBASE_BYT_SZ - 1; + + info->set_writeable = lpc_ich_byt_set_writeable; } break; @@ -1113,8 +1162,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) res->start = spi_base + SPIBASE_LPT; res->end = res->start + SPIBASE_LPT_SZ - 1; - pci_read_config_dword(dev, BCR, &bcr); - info->writeable = !!(bcr & BCR_WPD); + info->set_writeable = lpc_ich_lpt_set_writeable; + info->data = dev; } break; @@ -1135,8 +1184,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) res->start = spi_base & 0xfffffff0; res->end = res->start + SPIBASE_APL_SZ - 1; - pci_bus_read_config_dword(bus, spi, BCR, &bcr); - info->writeable = !!(bcr & BCR_WPD); + info->set_writeable = lpc_ich_bxt_set_writeable; + info->data = bus; } pci_bus_write_config_byte(bus, p2sb, 0xe1, 0x1); diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c index 1bc53b8bb88a9..508f7ca098eff 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c @@ -16,12 +16,30 @@ #define BCR 0xdc #define BCR_WPD BIT(0) +static bool intel_spi_pci_set_writeable(void __iomem *base, void *data) +{ + struct pci_dev *pdev = data; + u32 bcr; + + /* Try to make the chip read/write */ + pci_read_config_dword(pdev, BCR, &bcr); + if (!(bcr & BCR_WPD)) { + bcr |= BCR_WPD; + pci_write_config_dword(pdev, BCR, bcr); + pci_read_config_dword(pdev, BCR, &bcr); + } + + return bcr & BCR_WPD; +} + static const struct intel_spi_boardinfo bxt_info = { .type = INTEL_SPI_BXT, + .set_writeable = intel_spi_pci_set_writeable, }; static const struct intel_spi_boardinfo cnl_info = { .type = INTEL_SPI_CNL, + .set_writeable = intel_spi_pci_set_writeable, }; static int intel_spi_pci_probe(struct pci_dev *pdev, @@ -29,7 +47,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, { struct intel_spi_boardinfo *info; struct intel_spi *ispi; - u32 bcr; int ret; ret = pcim_enable_device(pdev); @@ -41,15 +58,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, if (!info) return -ENOMEM; - /* Try to make the chip read/write */ - pci_read_config_dword(pdev, BCR, &bcr); - if (!(bcr & BCR_WPD)) { - bcr |= BCR_WPD; - pci_write_config_dword(pdev, BCR, bcr); - pci_read_config_dword(pdev, BCR, &bcr); - } - info->writeable = !!(bcr & BCR_WPD); - + info->data = pdev; ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info); if (IS_ERR(ispi)) return PTR_ERR(ispi); diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c index a413892ff449f..f35597cbea0ca 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi.c @@ -131,7 +131,6 @@ * @sregs: Start of software sequencer registers * @nregions: Maximum number of regions * @pr_num: Maximum number of protected range registers - * @writeable: Is the chip writeable * @locked: Is SPI setting locked * @swseq_reg: Use SW sequencer in register reads/writes * @swseq_erase: Use SW sequencer in erase operation @@ -149,7 +148,6 @@ struct intel_spi { void __iomem *sregs; size_t nregions; size_t pr_num; - bool writeable; bool locked; bool swseq_reg; bool swseq_erase; @@ -304,6 +302,14 @@ static int intel_spi_wait_sw_busy(struct intel_spi *ispi) INTEL_SPI_TIMEOUT * 1000); } +static bool intel_spi_set_writeable(struct intel_spi *ispi) +{ + if (!ispi->info->set_writeable) + return false; + + return ispi->info->set_writeable(ispi->base, ispi->info->data); +} + static int intel_spi_init(struct intel_spi *ispi) { u32 opmenu0, opmenu1, lvscc, uvscc, val; @@ -316,19 +322,6 @@ static int intel_spi_init(struct intel_spi *ispi) ispi->nregions = BYT_FREG_NUM; ispi->pr_num = BYT_PR_NUM; ispi->swseq_reg = true; - - if (writeable) { - /* Disable write protection */ - val = readl(ispi->base + BYT_BCR); - if (!(val & BYT_BCR_WPD)) { - val |= BYT_BCR_WPD; - writel(val, ispi->base + BYT_BCR); - val = readl(ispi->base + BYT_BCR); - } - - ispi->writeable = !!(val & BYT_BCR_WPD); - } - break; case INTEL_SPI_LPT: @@ -358,6 +351,12 @@ static int intel_spi_init(struct intel_spi *ispi) return -EINVAL; } + /* Try to disable write protection if user asked to do so */ + if (writeable && !intel_spi_set_writeable(ispi)) { + dev_warn(ispi->dev, "can't disable chip write protection\n"); + writeable = false; + } + /* Disable #SMI generation from HW sequencer */ val = readl(ispi->base + HSFSTS_CTL); val &= ~HSFSTS_CTL_FSMIE; @@ -884,9 +883,12 @@ static void intel_spi_fill_partition(struct intel_spi *ispi, /* * If any of the regions have protection bits set, make the * whole partition read-only to be on the safe side. + * + * Also if the user did not ask the chip to be writeable + * mask the bit too. */ - if (intel_spi_is_protected(ispi, base, limit)) - ispi->writeable = false; + if (!writeable || intel_spi_is_protected(ispi, base, limit)) + part->mask_flags |= MTD_WRITEABLE; end = (limit << 12) + 4096; if (end > part->size) @@ -927,7 +929,6 @@ struct intel_spi *intel_spi_probe(struct device *dev, ispi->dev = dev; ispi->info = info; - ispi->writeable = info->writeable; ret = intel_spi_init(ispi); if (ret) @@ -945,10 +946,6 @@ struct intel_spi *intel_spi_probe(struct device *dev, intel_spi_fill_partition(ispi, &part); - /* Prevent writes if not explicitly enabled */ - if (!ispi->writeable || !writeable) - ispi->nor.mtd.flags &= ~MTD_WRITEABLE; - ret = mtd_device_register(&ispi->nor.mtd, &part, 1); if (ret) return ERR_PTR(ret); diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h index 7f53a5c6f35e8..7dda3f6904654 100644 --- a/include/linux/platform_data/x86/intel-spi.h +++ b/include/linux/platform_data/x86/intel-spi.h @@ -19,11 +19,13 @@ enum intel_spi_type { /** * struct intel_spi_boardinfo - Board specific data for Intel SPI driver * @type: Type which this controller is compatible with - * @writeable: The chip is writeable + * @set_writeable: Try to make the chip writeable (optional) + * @data: Data to be passed to @set_writeable can be %NULL */ struct intel_spi_boardinfo { enum intel_spi_type type; - bool writeable; + bool (*set_writeable)(void __iomem *base, void *data); + void *data; }; #endif /* INTEL_SPI_PDATA_H */ -- GitLab From e23e5a05d1fd9479586c40ffbcc056b3e34ef816 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 9 Feb 2022 15:27:05 +0300 Subject: [PATCH 0369/1586] mtd: spi-nor: intel-spi: Convert to SPI MEM The preferred way to implement SPI-NOR controller drivers is through SPI subsubsystem utilizing the SPI MEM core functions. This converts the Intel SPI flash controller driver over the SPI MEM by moving the driver from SPI-NOR subsystem to SPI subsystem and in one go make it use the SPI MEM functions. The driver name will be changed from intel-spi to spi-intel to match the convention used in the SPI subsystem. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Mauro Lima Reviewed-by: Boris Brezillon Acked-by: Lee Jones Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220209122706.42439-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/mtd/spi-nor/controllers/Kconfig | 36 - drivers/mtd/spi-nor/controllers/Makefile | 3 - drivers/mtd/spi-nor/controllers/intel-spi.h | 21 - drivers/spi/Kconfig | 39 + drivers/spi/Makefile | 3 + .../intel-spi-pci.c => spi/spi-intel-pci.c} | 20 +- .../spi-intel-platform.c} | 21 +- .../intel-spi.c => spi/spi-intel.c} | 829 ++++++++++++------ drivers/spi/spi-intel.h | 19 + include/linux/mfd/lpc_ich.h | 2 +- .../x86/{intel-spi.h => spi-intel.h} | 6 +- 11 files changed, 628 insertions(+), 371 deletions(-) delete mode 100644 drivers/mtd/spi-nor/controllers/intel-spi.h rename drivers/{mtd/spi-nor/controllers/intel-spi-pci.c => spi/spi-intel-pci.c} (86%) rename drivers/{mtd/spi-nor/controllers/intel-spi-platform.c => spi/spi-intel-platform.c} (65%) rename drivers/{mtd/spi-nor/controllers/intel-spi.c => spi/spi-intel.c} (57%) create mode 100644 drivers/spi/spi-intel.h rename include/linux/platform_data/x86/{intel-spi.h => spi-intel.h} (89%) diff --git a/drivers/mtd/spi-nor/controllers/Kconfig b/drivers/mtd/spi-nor/controllers/Kconfig index 5c0e0ec2e6d1f..50f4f3484d426 100644 --- a/drivers/mtd/spi-nor/controllers/Kconfig +++ b/drivers/mtd/spi-nor/controllers/Kconfig @@ -26,39 +26,3 @@ config SPI_NXP_SPIFI SPIFI is a specialized controller for connecting serial SPI Flash. Enable this option if you have a device with a SPIFI controller and want to access the Flash as a mtd device. - -config SPI_INTEL_SPI - tristate - -config SPI_INTEL_SPI_PCI - tristate "Intel PCH/PCU SPI flash PCI driver (DANGEROUS)" - depends on X86 && PCI - select SPI_INTEL_SPI - help - This enables PCI support for the Intel PCH/PCU SPI controller in - master mode. This controller is present in modern Intel hardware - and is used to hold BIOS and other persistent settings. Using - this driver it is possible to upgrade BIOS directly from Linux. - - Say N here unless you know what you are doing. Overwriting the - SPI flash may render the system unbootable. - - To compile this driver as a module, choose M here: the module - will be called intel-spi-pci. - -config SPI_INTEL_SPI_PLATFORM - tristate "Intel PCH/PCU SPI flash platform driver (DANGEROUS)" - depends on X86 - select SPI_INTEL_SPI - help - This enables platform support for the Intel PCH/PCU SPI - controller in master mode. This controller is present in modern - Intel hardware and is used to hold BIOS and other persistent - settings. Using this driver it is possible to upgrade BIOS - directly from Linux. - - Say N here unless you know what you are doing. Overwriting the - SPI flash may render the system unbootable. - - To compile this driver as a module, choose M here: the module - will be called intel-spi-platform. diff --git a/drivers/mtd/spi-nor/controllers/Makefile b/drivers/mtd/spi-nor/controllers/Makefile index e7abba491d983..6e2a1dc684662 100644 --- a/drivers/mtd/spi-nor/controllers/Makefile +++ b/drivers/mtd/spi-nor/controllers/Makefile @@ -2,6 +2,3 @@ obj-$(CONFIG_SPI_ASPEED_SMC) += aspeed-smc.o obj-$(CONFIG_SPI_HISI_SFC) += hisi-sfc.o obj-$(CONFIG_SPI_NXP_SPIFI) += nxp-spifi.o -obj-$(CONFIG_SPI_INTEL_SPI) += intel-spi.o -obj-$(CONFIG_SPI_INTEL_SPI_PCI) += intel-spi-pci.o -obj-$(CONFIG_SPI_INTEL_SPI_PLATFORM) += intel-spi-platform.o diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.h b/drivers/mtd/spi-nor/controllers/intel-spi.h deleted file mode 100644 index f2871179fd34d..0000000000000 --- a/drivers/mtd/spi-nor/controllers/intel-spi.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Intel PCH/PCU SPI flash driver. - * - * Copyright (C) 2016, Intel Corporation - * Author: Mika Westerberg - */ - -#ifndef INTEL_SPI_H -#define INTEL_SPI_H - -#include - -struct intel_spi; -struct resource; - -struct intel_spi *intel_spi_probe(struct device *dev, - struct resource *mem, const struct intel_spi_boardinfo *info); -int intel_spi_remove(struct intel_spi *ispi); - -#endif /* INTEL_SPI_H */ diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index b2a8821971e1d..0201257511fbe 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -427,6 +427,45 @@ config SPI_INGENIC To compile this driver as a module, choose M here: the module will be called spi-ingenic. +config SPI_INTEL + tristate + +config SPI_INTEL_PCI + tristate "Intel PCH/PCU SPI flash PCI driver (DANGEROUS)" + depends on PCI + depends on X86 || COMPILE_TEST + depends on SPI_MEM + select SPI_INTEL + help + This enables PCI support for the Intel PCH/PCU SPI controller in + master mode. This controller is present in modern Intel hardware + and is used to hold BIOS and other persistent settings. Using + this driver it is possible to upgrade BIOS directly from Linux. + + Say N here unless you know what you are doing. Overwriting the + SPI flash may render the system unbootable. + + To compile this driver as a module, choose M here: the module + will be called spi-intel-pci. + +config SPI_INTEL_PLATFORM + tristate "Intel PCH/PCU SPI flash platform driver (DANGEROUS)" + depends on X86 || COMPILE_TEST + depends on SPI_MEM + select SPI_INTEL + help + This enables platform support for the Intel PCH/PCU SPI + controller in master mode. This controller is present in modern + Intel hardware and is used to hold BIOS and other persistent + settings. Using this driver it is possible to upgrade BIOS + directly from Linux. + + Say N here unless you know what you are doing. Overwriting the + SPI flash may render the system unbootable. + + To compile this driver as a module, choose M here: the module + will be called spi-intel-platform. + config SPI_JCORE tristate "J-Core SPI Master" depends on OF && (SUPERH || COMPILE_TEST) diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index dd7393a6046fa..36b2045f08d25 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -61,6 +61,9 @@ obj-$(CONFIG_SPI_HISI_SFC_V3XX) += spi-hisi-sfc-v3xx.o obj-$(CONFIG_SPI_IMG_SPFI) += spi-img-spfi.o obj-$(CONFIG_SPI_IMX) += spi-imx.o obj-$(CONFIG_SPI_INGENIC) += spi-ingenic.o +obj-$(CONFIG_SPI_INTEL) += spi-intel.o +obj-$(CONFIG_SPI_INTEL_PCI) += spi-intel-pci.o +obj-$(CONFIG_SPI_INTEL_PLATFORM) += spi-intel-platform.o obj-$(CONFIG_SPI_LANTIQ_SSC) += spi-lantiq-ssc.o obj-$(CONFIG_SPI_JCORE) += spi-jcore.o obj-$(CONFIG_SPI_LM70_LLP) += spi-lm70llp.o diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/spi/spi-intel-pci.c similarity index 86% rename from drivers/mtd/spi-nor/controllers/intel-spi-pci.c rename to drivers/spi/spi-intel-pci.c index 508f7ca098eff..a9cb4d77ffe39 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -2,16 +2,14 @@ /* * Intel PCH/PCU SPI flash PCI driver. * - * Copyright (C) 2016, Intel Corporation + * Copyright (C) 2016 - 2022, Intel Corporation * Author: Mika Westerberg */ -#include -#include #include #include -#include "intel-spi.h" +#include "spi-intel.h" #define BCR 0xdc #define BCR_WPD BIT(0) @@ -46,7 +44,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_spi_boardinfo *info; - struct intel_spi *ispi; int ret; ret = pcim_enable_device(pdev); @@ -59,17 +56,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, return -ENOMEM; info->data = pdev; - ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info); - if (IS_ERR(ispi)) - return PTR_ERR(ispi); - - pci_set_drvdata(pdev, ispi); - return 0; -} - -static void intel_spi_pci_remove(struct pci_dev *pdev) -{ - intel_spi_remove(pci_get_drvdata(pdev)); + return intel_spi_probe(&pdev->dev, &pdev->resource[0], info); } static const struct pci_device_id intel_spi_pci_ids[] = { @@ -98,7 +85,6 @@ static struct pci_driver intel_spi_pci_driver = { .name = "intel-spi", .id_table = intel_spi_pci_ids, .probe = intel_spi_pci_probe, - .remove = intel_spi_pci_remove, }; module_pci_driver(intel_spi_pci_driver); diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-platform.c b/drivers/spi/spi-intel-platform.c similarity index 65% rename from drivers/mtd/spi-nor/controllers/intel-spi-platform.c rename to drivers/spi/spi-intel-platform.c index f80f1086f928c..2ef09fa356612 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi-platform.c +++ b/drivers/spi/spi-intel-platform.c @@ -2,20 +2,18 @@ /* * Intel PCH/PCU SPI flash platform driver. * - * Copyright (C) 2016, Intel Corporation + * Copyright (C) 2016 - 2022, Intel Corporation * Author: Mika Westerberg */ -#include #include #include -#include "intel-spi.h" +#include "spi-intel.h" static int intel_spi_platform_probe(struct platform_device *pdev) { struct intel_spi_boardinfo *info; - struct intel_spi *ispi; struct resource *mem; info = dev_get_platdata(&pdev->dev); @@ -23,24 +21,11 @@ static int intel_spi_platform_probe(struct platform_device *pdev) return -EINVAL; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ispi = intel_spi_probe(&pdev->dev, mem, info); - if (IS_ERR(ispi)) - return PTR_ERR(ispi); - - platform_set_drvdata(pdev, ispi); - return 0; -} - -static int intel_spi_platform_remove(struct platform_device *pdev) -{ - struct intel_spi *ispi = platform_get_drvdata(pdev); - - return intel_spi_remove(ispi); + return intel_spi_probe(&pdev->dev, mem, info); } static struct platform_driver intel_spi_platform_driver = { .probe = intel_spi_platform_probe, - .remove = intel_spi_platform_remove, .driver = { .name = "intel-spi", }, diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/spi/spi-intel.c similarity index 57% rename from drivers/mtd/spi-nor/controllers/intel-spi.c rename to drivers/spi/spi-intel.c index f35597cbea0ca..e937cfe85559b 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi.c +++ b/drivers/spi/spi-intel.c @@ -2,21 +2,21 @@ /* * Intel PCH/PCU SPI flash driver. * - * Copyright (C) 2016, Intel Corporation + * Copyright (C) 2016 - 2022, Intel Corporation * Author: Mika Westerberg */ -#include -#include #include #include -#include -#include -#include + #include #include -#include "intel-spi.h" +#include +#include +#include + +#include "spi-intel.h" /* Offsets are from @ispi->base */ #define BFPREG 0x00 @@ -92,8 +92,6 @@ /* CPU specifics */ #define BYT_PR 0x74 #define BYT_SSFSTS_CTL 0x90 -#define BYT_BCR 0xfc -#define BYT_BCR_WPD BIT(0) #define BYT_FREG_NUM 5 #define BYT_PR_NUM 5 @@ -125,35 +123,43 @@ * struct intel_spi - Driver private data * @dev: Device pointer * @info: Pointer to board specific info - * @nor: SPI NOR layer structure * @base: Beginning of MMIO space * @pregs: Start of protection registers * @sregs: Start of software sequencer registers + * @master: Pointer to the SPI controller structure * @nregions: Maximum number of regions * @pr_num: Maximum number of protected range registers * @locked: Is SPI setting locked * @swseq_reg: Use SW sequencer in register reads/writes * @swseq_erase: Use SW sequencer in erase operation - * @erase_64k: 64k erase supported * @atomic_preopcode: Holds preopcode when atomic sequence is requested * @opcodes: Opcodes which are supported. This are programmed by BIOS * before it locks down the controller. + * @mem_ops: Pointer to SPI MEM ops supported by the controller */ struct intel_spi { struct device *dev; const struct intel_spi_boardinfo *info; - struct spi_nor nor; void __iomem *base; void __iomem *pregs; void __iomem *sregs; + struct spi_controller *master; size_t nregions; size_t pr_num; bool locked; bool swseq_reg; bool swseq_erase; - bool erase_64k; u8 atomic_preopcode; u8 opcodes[8]; + const struct intel_spi_mem_op *mem_ops; +}; + +struct intel_spi_mem_op { + struct spi_mem_op mem_op; + u32 replacement_op; + int (*exec_op)(struct intel_spi *ispi, + const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op); }; static bool writeable; @@ -199,9 +205,6 @@ static void intel_spi_dump_regs(struct intel_spi *ispi) readl(ispi->sregs + OPMENU1)); } - if (ispi->info->type == INTEL_SPI_BYT) - dev_dbg(ispi->dev, "BCR=0x%08x\n", readl(ispi->base + BYT_BCR)); - dev_dbg(ispi->dev, "LVSCC=0x%08x\n", readl(ispi->base + LVSCC)); dev_dbg(ispi->dev, "UVSCC=0x%08x\n", readl(ispi->base + UVSCC)); @@ -217,9 +220,8 @@ static void intel_spi_dump_regs(struct intel_spi *ispi) base = value & PR_BASE_MASK; dev_dbg(ispi->dev, " %02d base: 0x%08x limit: 0x%08x [%c%c]\n", - i, base << 12, (limit << 12) | 0xfff, - value & PR_WPE ? 'W' : '.', - value & PR_RPE ? 'R' : '.'); + i, base << 12, (limit << 12) | 0xfff, + value & PR_WPE ? 'W' : '.', value & PR_RPE ? 'R' : '.'); } dev_dbg(ispi->dev, "Flash regions:\n"); @@ -234,7 +236,7 @@ static void intel_spi_dump_regs(struct intel_spi *ispi) dev_dbg(ispi->dev, " %02d disabled\n", i); else dev_dbg(ispi->dev, " %02d base: 0x%08x limit: 0x%08x\n", - i, base << 12, (limit << 12) | 0xfff); + i, base << 12, (limit << 12) | 0xfff); } dev_dbg(ispi->dev, "Using %cW sequencer for register access\n", @@ -310,119 +312,6 @@ static bool intel_spi_set_writeable(struct intel_spi *ispi) return ispi->info->set_writeable(ispi->base, ispi->info->data); } -static int intel_spi_init(struct intel_spi *ispi) -{ - u32 opmenu0, opmenu1, lvscc, uvscc, val; - int i; - - switch (ispi->info->type) { - case INTEL_SPI_BYT: - ispi->sregs = ispi->base + BYT_SSFSTS_CTL; - ispi->pregs = ispi->base + BYT_PR; - ispi->nregions = BYT_FREG_NUM; - ispi->pr_num = BYT_PR_NUM; - ispi->swseq_reg = true; - break; - - case INTEL_SPI_LPT: - ispi->sregs = ispi->base + LPT_SSFSTS_CTL; - ispi->pregs = ispi->base + LPT_PR; - ispi->nregions = LPT_FREG_NUM; - ispi->pr_num = LPT_PR_NUM; - ispi->swseq_reg = true; - break; - - case INTEL_SPI_BXT: - ispi->sregs = ispi->base + BXT_SSFSTS_CTL; - ispi->pregs = ispi->base + BXT_PR; - ispi->nregions = BXT_FREG_NUM; - ispi->pr_num = BXT_PR_NUM; - ispi->erase_64k = true; - break; - - case INTEL_SPI_CNL: - ispi->sregs = NULL; - ispi->pregs = ispi->base + CNL_PR; - ispi->nregions = CNL_FREG_NUM; - ispi->pr_num = CNL_PR_NUM; - break; - - default: - return -EINVAL; - } - - /* Try to disable write protection if user asked to do so */ - if (writeable && !intel_spi_set_writeable(ispi)) { - dev_warn(ispi->dev, "can't disable chip write protection\n"); - writeable = false; - } - - /* Disable #SMI generation from HW sequencer */ - val = readl(ispi->base + HSFSTS_CTL); - val &= ~HSFSTS_CTL_FSMIE; - writel(val, ispi->base + HSFSTS_CTL); - - /* - * Determine whether erase operation should use HW or SW sequencer. - * - * The HW sequencer has a predefined list of opcodes, with only the - * erase opcode being programmable in LVSCC and UVSCC registers. - * If these registers don't contain a valid erase opcode, erase - * cannot be done using HW sequencer. - */ - lvscc = readl(ispi->base + LVSCC); - uvscc = readl(ispi->base + UVSCC); - if (!(lvscc & ERASE_OPCODE_MASK) || !(uvscc & ERASE_OPCODE_MASK)) - ispi->swseq_erase = true; - /* SPI controller on Intel BXT supports 64K erase opcode */ - if (ispi->info->type == INTEL_SPI_BXT && !ispi->swseq_erase) - if (!(lvscc & ERASE_64K_OPCODE_MASK) || - !(uvscc & ERASE_64K_OPCODE_MASK)) - ispi->erase_64k = false; - - if (ispi->sregs == NULL && (ispi->swseq_reg || ispi->swseq_erase)) { - dev_err(ispi->dev, "software sequencer not supported, but required\n"); - return -EINVAL; - } - - /* - * Some controllers can only do basic operations using hardware - * sequencer. All other operations are supposed to be carried out - * using software sequencer. - */ - if (ispi->swseq_reg) { - /* Disable #SMI generation from SW sequencer */ - val = readl(ispi->sregs + SSFSTS_CTL); - val &= ~SSFSTS_CTL_FSMIE; - writel(val, ispi->sregs + SSFSTS_CTL); - } - - /* Check controller's lock status */ - val = readl(ispi->base + HSFSTS_CTL); - ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN); - - if (ispi->locked && ispi->sregs) { - /* - * BIOS programs allowed opcodes and then locks down the - * register. So read back what opcodes it decided to support. - * That's the set we are going to support as well. - */ - opmenu0 = readl(ispi->sregs + OPMENU0); - opmenu1 = readl(ispi->sregs + OPMENU1); - - if (opmenu0 && opmenu1) { - for (i = 0; i < ARRAY_SIZE(ispi->opcodes) / 2; i++) { - ispi->opcodes[i] = opmenu0 >> i * 8; - ispi->opcodes[i + 4] = opmenu1 >> i * 8; - } - } - } - - intel_spi_dump_regs(ispi); - - return 0; -} - static int intel_spi_opcode_index(struct intel_spi *ispi, u8 opcode, int optype) { int i; @@ -536,7 +425,6 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len, default: return -EINVAL; } - } writel(val, ispi->sregs + SSFSTS_CTL); @@ -553,31 +441,35 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len, return 0; } -static int intel_spi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, - size_t len) +static int intel_spi_read_reg(struct intel_spi *ispi, + const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op) { - struct intel_spi *ispi = nor->priv; + size_t nbytes = op->data.nbytes; + u8 opcode = op->cmd.opcode; int ret; /* Address of the first chip */ writel(0, ispi->base + FADDR); if (ispi->swseq_reg) - ret = intel_spi_sw_cycle(ispi, opcode, len, + ret = intel_spi_sw_cycle(ispi, opcode, nbytes, OPTYPE_READ_NO_ADDR); else - ret = intel_spi_hw_cycle(ispi, opcode, len); + ret = intel_spi_hw_cycle(ispi, opcode, nbytes); if (ret) return ret; - return intel_spi_read_block(ispi, buf, len); + return intel_spi_read_block(ispi, op->data.buf.in, nbytes); } -static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf, - size_t len) +static int intel_spi_write_reg(struct intel_spi *ispi, + const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op) { - struct intel_spi *ispi = nor->priv; + size_t nbytes = op->data.nbytes; + u8 opcode = op->cmd.opcode; int ret; /* @@ -622,23 +514,25 @@ static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf, writel(0, ispi->base + FADDR); /* Write the value beforehand */ - ret = intel_spi_write_block(ispi, buf, len); + ret = intel_spi_write_block(ispi, op->data.buf.out, nbytes); if (ret) return ret; if (ispi->swseq_reg) - return intel_spi_sw_cycle(ispi, opcode, len, + return intel_spi_sw_cycle(ispi, opcode, nbytes, OPTYPE_WRITE_NO_ADDR); - return intel_spi_hw_cycle(ispi, opcode, len); + return intel_spi_hw_cycle(ispi, opcode, nbytes); } -static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len, - u_char *read_buf) +static int intel_spi_read(struct intel_spi *ispi, + const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op) { - struct intel_spi *ispi = nor->priv; - size_t block_size, retlen = 0; + void *read_buf = op->data.buf.in; + size_t block_size, nbytes = op->data.nbytes; + u32 addr = op->addr.val; u32 val, status; - ssize_t ret; + int ret; /* * Atomic sequence is not expected with HW sequencer reads. Make @@ -647,24 +541,14 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len, if (WARN_ON_ONCE(ispi->atomic_preopcode)) ispi->atomic_preopcode = 0; - switch (nor->read_opcode) { - case SPINOR_OP_READ: - case SPINOR_OP_READ_FAST: - case SPINOR_OP_READ_4B: - case SPINOR_OP_READ_FAST_4B: - break; - default: - return -EINVAL; - } - - while (len > 0) { - block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + while (nbytes > 0) { + block_size = min_t(size_t, nbytes, INTEL_SPI_FIFO_SZ); /* Read cannot cross 4K boundary */ - block_size = min_t(loff_t, from + block_size, - round_up(from + 1, SZ_4K)) - from; + block_size = min_t(loff_t, addr + block_size, + round_up(addr + 1, SZ_4K)) - addr; - writel(from, ispi->base + FADDR); + writel(addr, ispi->base + FADDR); val = readl(ispi->base + HSFSTS_CTL); val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK); @@ -685,8 +569,7 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len, ret = -EACCES; if (ret < 0) { - dev_err(ispi->dev, "read error: %llx: %#x\n", from, - status); + dev_err(ispi->dev, "read error: %x: %#x\n", addr, status); return ret; } @@ -694,34 +577,35 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len, if (ret) return ret; - len -= block_size; - from += block_size; - retlen += block_size; + nbytes -= block_size; + addr += block_size; read_buf += block_size; } - return retlen; + return 0; } -static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len, - const u_char *write_buf) +static int intel_spi_write(struct intel_spi *ispi, + const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op) { - struct intel_spi *ispi = nor->priv; - size_t block_size, retlen = 0; + size_t block_size, nbytes = op->data.nbytes; + const void *write_buf = op->data.buf.out; + u32 addr = op->addr.val; u32 val, status; - ssize_t ret; + int ret; /* Not needed with HW sequencer write, make sure it is cleared */ ispi->atomic_preopcode = 0; - while (len > 0) { - block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + while (nbytes > 0) { + block_size = min_t(size_t, nbytes, INTEL_SPI_FIFO_SZ); /* Write cannot cross 4K boundary */ - block_size = min_t(loff_t, to + block_size, - round_up(to + 1, SZ_4K)) - to; + block_size = min_t(loff_t, addr + block_size, + round_up(addr + 1, SZ_4K)) - addr; - writel(to, ispi->base + FADDR); + writel(addr, ispi->base + FADDR); val = readl(ispi->base + HSFSTS_CTL); val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK); @@ -752,79 +636,476 @@ static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len, ret = -EACCES; if (ret < 0) { - dev_err(ispi->dev, "write error: %llx: %#x\n", to, - status); + dev_err(ispi->dev, "write error: %x: %#x\n", addr, status); return ret; } - len -= block_size; - to += block_size; - retlen += block_size; + nbytes -= block_size; + addr += block_size; write_buf += block_size; } - return retlen; + return 0; } -static int intel_spi_erase(struct spi_nor *nor, loff_t offs) +static int intel_spi_erase(struct intel_spi *ispi, + const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op) { - size_t erase_size, len = nor->mtd.erasesize; - struct intel_spi *ispi = nor->priv; - u32 val, status, cmd; + u8 opcode = op->cmd.opcode; + u32 addr = op->addr.val; + u32 val, status; int ret; - /* If the hardware can do 64k erase use that when possible */ - if (len >= SZ_64K && ispi->erase_64k) { - cmd = HSFSTS_CTL_FCYCLE_ERASE_64K; - erase_size = SZ_64K; - } else { - cmd = HSFSTS_CTL_FCYCLE_ERASE; - erase_size = SZ_4K; + writel(addr, ispi->base + FADDR); + + if (ispi->swseq_erase) + return intel_spi_sw_cycle(ispi, opcode, 0, + OPTYPE_WRITE_WITH_ADDR); + + /* Not needed with HW sequencer erase, make sure it is cleared */ + ispi->atomic_preopcode = 0; + + val = readl(ispi->base + HSFSTS_CTL); + val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK); + val |= HSFSTS_CTL_AEL | HSFSTS_CTL_FCERR | HSFSTS_CTL_FDONE; + val |= HSFSTS_CTL_FGO; + val |= iop->replacement_op; + writel(val, ispi->base + HSFSTS_CTL); + + ret = intel_spi_wait_hw_busy(ispi); + if (ret) + return ret; + + status = readl(ispi->base + HSFSTS_CTL); + if (status & HSFSTS_CTL_FCERR) + return -EIO; + if (status & HSFSTS_CTL_AEL) + return -EACCES; + + return 0; +} + +static bool intel_spi_cmp_mem_op(const struct intel_spi_mem_op *iop, + const struct spi_mem_op *op) +{ + if (iop->mem_op.cmd.nbytes != op->cmd.nbytes || + iop->mem_op.cmd.buswidth != op->cmd.buswidth || + iop->mem_op.cmd.dtr != op->cmd.dtr || + iop->mem_op.cmd.opcode != op->cmd.opcode) + return false; + + if (iop->mem_op.addr.nbytes != op->addr.nbytes || + iop->mem_op.addr.dtr != op->addr.dtr) + return false; + + if (iop->mem_op.data.dir != op->data.dir || + iop->mem_op.data.dtr != op->data.dtr) + return false; + + if (iop->mem_op.data.dir != SPI_MEM_NO_DATA) { + if (iop->mem_op.data.buswidth != op->data.buswidth) + return false; } - if (ispi->swseq_erase) { - while (len > 0) { - writel(offs, ispi->base + FADDR); + return true; +} - ret = intel_spi_sw_cycle(ispi, nor->erase_opcode, - 0, OPTYPE_WRITE_WITH_ADDR); - if (ret) - return ret; +static const struct intel_spi_mem_op * +intel_spi_match_mem_op(struct intel_spi *ispi, const struct spi_mem_op *op) +{ + const struct intel_spi_mem_op *iop; - offs += erase_size; - len -= erase_size; + for (iop = ispi->mem_ops; iop->mem_op.cmd.opcode; iop++) { + if (intel_spi_cmp_mem_op(iop, op)) + break; + } + + return iop->mem_op.cmd.opcode ? iop : NULL; +} + +static bool intel_spi_supports_mem_op(struct spi_mem *mem, + const struct spi_mem_op *op) +{ + struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master); + const struct intel_spi_mem_op *iop; + + iop = intel_spi_match_mem_op(ispi, op); + if (!iop) { + dev_dbg(ispi->dev, "%#x not supported\n", op->cmd.opcode); + return false; + } + + /* + * For software sequencer check that the opcode is actually + * present in the opmenu if it is locked. + */ + if (ispi->swseq_reg && ispi->locked) { + int i; + + /* Check if it is in the locked opcodes list */ + for (i = 0; i < ARRAY_SIZE(ispi->opcodes); i++) { + if (ispi->opcodes[i] == op->cmd.opcode) + return true; } - return 0; + dev_dbg(ispi->dev, "%#x not supported\n", op->cmd.opcode); + return false; } - /* Not needed with HW sequencer erase, make sure it is cleared */ - ispi->atomic_preopcode = 0; + return true; +} - while (len > 0) { - writel(offs, ispi->base + FADDR); +static int intel_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) +{ + struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master); + const struct intel_spi_mem_op *iop; - val = readl(ispi->base + HSFSTS_CTL); - val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK); - val |= HSFSTS_CTL_AEL | HSFSTS_CTL_FCERR | HSFSTS_CTL_FDONE; - val |= cmd; - val |= HSFSTS_CTL_FGO; - writel(val, ispi->base + HSFSTS_CTL); + iop = intel_spi_match_mem_op(ispi, op); + if (!iop) + return -EOPNOTSUPP; - ret = intel_spi_wait_hw_busy(ispi); - if (ret) - return ret; + return iop->exec_op(ispi, iop, op); +} - status = readl(ispi->base + HSFSTS_CTL); - if (status & HSFSTS_CTL_FCERR) - return -EIO; - else if (status & HSFSTS_CTL_AEL) - return -EACCES; +static const char *intel_spi_get_name(struct spi_mem *mem) +{ + const struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master); + + /* + * Return name of the flash controller device to be compatible + * with the MTD version. + */ + return dev_name(ispi->dev); +} + +static const struct spi_controller_mem_ops intel_spi_mem_ops = { + .supports_op = intel_spi_supports_mem_op, + .exec_op = intel_spi_exec_mem_op, + .get_name = intel_spi_get_name, +}; + +#define INTEL_SPI_OP_ADDR(__nbytes) \ + { \ + .nbytes = __nbytes, \ + } + +#define INTEL_SPI_OP_NO_DATA \ + { \ + .dir = SPI_MEM_NO_DATA, \ + } + +#define INTEL_SPI_OP_DATA_IN(__buswidth) \ + { \ + .dir = SPI_MEM_DATA_IN, \ + .buswidth = __buswidth, \ + } + +#define INTEL_SPI_OP_DATA_OUT(__buswidth) \ + { \ + .dir = SPI_MEM_DATA_OUT, \ + .buswidth = __buswidth, \ + } + +#define INTEL_SPI_MEM_OP(__cmd, __addr, __data, __exec_op) \ + { \ + .mem_op = { \ + .cmd = __cmd, \ + .addr = __addr, \ + .data = __data, \ + }, \ + .exec_op = __exec_op, \ + } + +#define INTEL_SPI_MEM_OP_REPL(__cmd, __addr, __data, __exec_op, __repl) \ + { \ + .mem_op = { \ + .cmd = __cmd, \ + .addr = __addr, \ + .data = __data, \ + }, \ + .exec_op = __exec_op, \ + .replacement_op = __repl, \ + } + +/* + * The controller handles pretty much everything internally based on the + * SFDP data but we want to make sure we only support the operations + * actually possible. Only check buswidth and transfer direction, the + * core validates data. + */ +#define INTEL_SPI_GENERIC_OPS \ + /* Status register operations */ \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1), \ + SPI_MEM_OP_NO_ADDR, \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read_reg), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1), \ + SPI_MEM_OP_NO_ADDR, \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read_reg), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1), \ + SPI_MEM_OP_NO_ADDR, \ + INTEL_SPI_OP_DATA_OUT(1), \ + intel_spi_write_reg), \ + /* Normal read */ \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_IN(2), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_IN(4), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(2), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(4), \ + intel_spi_read), \ + /* Fast read */ \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_IN(2), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_IN(4), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(2), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(4), \ + intel_spi_read), \ + /* Read with 4-byte address opcode */ \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(2), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(4), \ + intel_spi_read), \ + /* Fast read with 4-byte address opcode */ \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(1), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(2), \ + intel_spi_read), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_IN(4), \ + intel_spi_read), \ + /* Write operations */ \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP, 1), \ + INTEL_SPI_OP_ADDR(3), \ + INTEL_SPI_OP_DATA_OUT(1), \ + intel_spi_write), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_OUT(1), \ + intel_spi_write), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + INTEL_SPI_OP_DATA_OUT(1), \ + intel_spi_write), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DATA, \ + intel_spi_write_reg), \ + INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DATA, \ + intel_spi_write_reg), \ + /* Erase operations */ \ + INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K, 1), \ + INTEL_SPI_OP_ADDR(3), \ + SPI_MEM_OP_NO_DATA, \ + intel_spi_erase, \ + HSFSTS_CTL_FCYCLE_ERASE), \ + INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K, 1), \ + INTEL_SPI_OP_ADDR(4), \ + SPI_MEM_OP_NO_DATA, \ + intel_spi_erase, \ + HSFSTS_CTL_FCYCLE_ERASE), \ + INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K_4B, 1), \ + INTEL_SPI_OP_ADDR(4), \ + SPI_MEM_OP_NO_DATA, \ + intel_spi_erase, \ + HSFSTS_CTL_FCYCLE_ERASE) \ + +static const struct intel_spi_mem_op generic_mem_ops[] = { + INTEL_SPI_GENERIC_OPS, + { }, +}; + +static const struct intel_spi_mem_op erase_64k_mem_ops[] = { + INTEL_SPI_GENERIC_OPS, + /* 64k sector erase operations */ + INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE, 1), + INTEL_SPI_OP_ADDR(3), + SPI_MEM_OP_NO_DATA, + intel_spi_erase, + HSFSTS_CTL_FCYCLE_ERASE_64K), + INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE, 1), + INTEL_SPI_OP_ADDR(4), + SPI_MEM_OP_NO_DATA, + intel_spi_erase, + HSFSTS_CTL_FCYCLE_ERASE_64K), + INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE_4B, 1), + INTEL_SPI_OP_ADDR(4), + SPI_MEM_OP_NO_DATA, + intel_spi_erase, + HSFSTS_CTL_FCYCLE_ERASE_64K), + { }, +}; + +static int intel_spi_init(struct intel_spi *ispi) +{ + u32 opmenu0, opmenu1, lvscc, uvscc, val; + bool erase_64k = false; + int i; + + switch (ispi->info->type) { + case INTEL_SPI_BYT: + ispi->sregs = ispi->base + BYT_SSFSTS_CTL; + ispi->pregs = ispi->base + BYT_PR; + ispi->nregions = BYT_FREG_NUM; + ispi->pr_num = BYT_PR_NUM; + ispi->swseq_reg = true; + break; + + case INTEL_SPI_LPT: + ispi->sregs = ispi->base + LPT_SSFSTS_CTL; + ispi->pregs = ispi->base + LPT_PR; + ispi->nregions = LPT_FREG_NUM; + ispi->pr_num = LPT_PR_NUM; + ispi->swseq_reg = true; + break; + + case INTEL_SPI_BXT: + ispi->sregs = ispi->base + BXT_SSFSTS_CTL; + ispi->pregs = ispi->base + BXT_PR; + ispi->nregions = BXT_FREG_NUM; + ispi->pr_num = BXT_PR_NUM; + erase_64k = true; + break; + + case INTEL_SPI_CNL: + ispi->sregs = NULL; + ispi->pregs = ispi->base + CNL_PR; + ispi->nregions = CNL_FREG_NUM; + ispi->pr_num = CNL_PR_NUM; + break; + + default: + return -EINVAL; + } + + /* Try to disable write protection if user asked to do so */ + if (writeable && !intel_spi_set_writeable(ispi)) { + dev_warn(ispi->dev, "can't disable chip write protection\n"); + writeable = false; + } - offs += erase_size; - len -= erase_size; + /* Disable #SMI generation from HW sequencer */ + val = readl(ispi->base + HSFSTS_CTL); + val &= ~HSFSTS_CTL_FSMIE; + writel(val, ispi->base + HSFSTS_CTL); + + /* + * Determine whether erase operation should use HW or SW sequencer. + * + * The HW sequencer has a predefined list of opcodes, with only the + * erase opcode being programmable in LVSCC and UVSCC registers. + * If these registers don't contain a valid erase opcode, erase + * cannot be done using HW sequencer. + */ + lvscc = readl(ispi->base + LVSCC); + uvscc = readl(ispi->base + UVSCC); + if (!(lvscc & ERASE_OPCODE_MASK) || !(uvscc & ERASE_OPCODE_MASK)) + ispi->swseq_erase = true; + /* SPI controller on Intel BXT supports 64K erase opcode */ + if (ispi->info->type == INTEL_SPI_BXT && !ispi->swseq_erase) + if (!(lvscc & ERASE_64K_OPCODE_MASK) || + !(uvscc & ERASE_64K_OPCODE_MASK)) + erase_64k = false; + + if (!ispi->sregs && (ispi->swseq_reg || ispi->swseq_erase)) { + dev_err(ispi->dev, "software sequencer not supported, but required\n"); + return -EINVAL; } + /* + * Some controllers can only do basic operations using hardware + * sequencer. All other operations are supposed to be carried out + * using software sequencer. + */ + if (ispi->swseq_reg) { + /* Disable #SMI generation from SW sequencer */ + val = readl(ispi->sregs + SSFSTS_CTL); + val &= ~SSFSTS_CTL_FSMIE; + writel(val, ispi->sregs + SSFSTS_CTL); + } + + /* Check controller's lock status */ + val = readl(ispi->base + HSFSTS_CTL); + ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN); + + if (ispi->locked && ispi->sregs) { + /* + * BIOS programs allowed opcodes and then locks down the + * register. So read back what opcodes it decided to support. + * That's the set we are going to support as well. + */ + opmenu0 = readl(ispi->sregs + OPMENU0); + opmenu1 = readl(ispi->sregs + OPMENU1); + + if (opmenu0 && opmenu1) { + for (i = 0; i < ARRAY_SIZE(ispi->opcodes) / 2; i++) { + ispi->opcodes[i] = opmenu0 >> i * 8; + ispi->opcodes[i + 4] = opmenu1 >> i * 8; + } + } + } + + if (erase_64k) { + dev_dbg(ispi->dev, "Using erase_64k memory operations"); + ispi->mem_ops = erase_64k_mem_ops; + } else { + dev_dbg(ispi->dev, "Using generic memory operations"); + ispi->mem_ops = generic_mem_ops; + } + + intel_spi_dump_regs(ispi); return 0; } @@ -896,70 +1177,74 @@ static void intel_spi_fill_partition(struct intel_spi *ispi, } } -static const struct spi_nor_controller_ops intel_spi_controller_ops = { - .read_reg = intel_spi_read_reg, - .write_reg = intel_spi_write_reg, - .read = intel_spi_read, - .write = intel_spi_write, - .erase = intel_spi_erase, -}; +static int intel_spi_populate_chip(struct intel_spi *ispi) +{ + struct flash_platform_data *pdata; + struct spi_board_info chip; + + pdata = devm_kzalloc(ispi->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdata->nr_parts = 1; + pdata->parts = devm_kcalloc(ispi->dev, sizeof(*pdata->parts), + pdata->nr_parts, GFP_KERNEL); + if (!pdata->parts) + return -ENOMEM; + + intel_spi_fill_partition(ispi, pdata->parts); + + memset(&chip, 0, sizeof(chip)); + snprintf(chip.modalias, 8, "spi-nor"); + chip.platform_data = pdata; -struct intel_spi *intel_spi_probe(struct device *dev, - struct resource *mem, const struct intel_spi_boardinfo *info) + return spi_new_device(ispi->master, &chip) ? 0 : -ENODEV; +} + +/** + * intel_spi_probe() - Probe the Intel SPI flash controller + * @dev: Pointer to the parent device + * @mem: MMIO resource + * @info: Platform spefific information + * + * Probes Intel SPI flash controller and creates the flash chip device. + * Returns %0 on success and negative errno in case of failure. + */ +int intel_spi_probe(struct device *dev, struct resource *mem, + const struct intel_spi_boardinfo *info) { - const struct spi_nor_hwcaps hwcaps = { - .mask = SNOR_HWCAPS_READ | - SNOR_HWCAPS_READ_FAST | - SNOR_HWCAPS_PP, - }; - struct mtd_partition part; + struct spi_controller *master; struct intel_spi *ispi; int ret; - if (!info || !mem) - return ERR_PTR(-EINVAL); + master = devm_spi_alloc_master(dev, sizeof(*ispi)); + if (!master) + return -ENOMEM; - ispi = devm_kzalloc(dev, sizeof(*ispi), GFP_KERNEL); - if (!ispi) - return ERR_PTR(-ENOMEM); + master->mem_ops = &intel_spi_mem_ops; + + ispi = spi_master_get_devdata(master); ispi->base = devm_ioremap_resource(dev, mem); if (IS_ERR(ispi->base)) - return ERR_CAST(ispi->base); + return PTR_ERR(ispi->base); ispi->dev = dev; + ispi->master = master; ispi->info = info; ret = intel_spi_init(ispi); if (ret) - return ERR_PTR(ret); - - ispi->nor.dev = ispi->dev; - ispi->nor.priv = ispi; - ispi->nor.controller_ops = &intel_spi_controller_ops; - - ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps); - if (ret) { - dev_info(dev, "failed to locate the chip\n"); - return ERR_PTR(ret); - } - - intel_spi_fill_partition(ispi, &part); + return ret; - ret = mtd_device_register(&ispi->nor.mtd, &part, 1); + ret = devm_spi_register_master(dev, master); if (ret) - return ERR_PTR(ret); + return ret; - return ispi; + return intel_spi_populate_chip(ispi); } EXPORT_SYMBOL_GPL(intel_spi_probe); -int intel_spi_remove(struct intel_spi *ispi) -{ - return mtd_device_unregister(&ispi->nor.mtd); -} -EXPORT_SYMBOL_GPL(intel_spi_remove); - MODULE_DESCRIPTION("Intel PCH/PCU SPI flash core driver"); MODULE_AUTHOR("Mika Westerberg "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/spi/spi-intel.h b/drivers/spi/spi-intel.h new file mode 100644 index 0000000000000..a4f0327a46ff2 --- /dev/null +++ b/drivers/spi/spi-intel.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel PCH/PCU SPI flash driver. + * + * Copyright (C) 2016 - 2022, Intel Corporation + * Author: Mika Westerberg + */ + +#ifndef SPI_INTEL_H +#define SPI_INTEL_H + +#include + +struct resource; + +int intel_spi_probe(struct device *dev, struct resource *mem, + const struct intel_spi_boardinfo *info); + +#endif /* SPI_INTEL_H */ diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 39967a5eca6d6..ea4a4b1b246a2 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -8,7 +8,7 @@ #ifndef LPC_ICH_H #define LPC_ICH_H -#include +#include /* GPIO resources */ #define ICH_RES_GPIO 0 diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/spi-intel.h similarity index 89% rename from include/linux/platform_data/x86/intel-spi.h rename to include/linux/platform_data/x86/spi-intel.h index 7dda3f6904654..a512ec37abbb5 100644 --- a/include/linux/platform_data/x86/intel-spi.h +++ b/include/linux/platform_data/x86/spi-intel.h @@ -6,8 +6,8 @@ * Author: Mika Westerberg */ -#ifndef INTEL_SPI_PDATA_H -#define INTEL_SPI_PDATA_H +#ifndef SPI_INTEL_PDATA_H +#define SPI_INTEL_PDATA_H enum intel_spi_type { INTEL_SPI_BYT = 1, @@ -28,4 +28,4 @@ struct intel_spi_boardinfo { void *data; }; -#endif /* INTEL_SPI_PDATA_H */ +#endif /* SPI_INTEL_PDATA_H */ -- GitLab From 4ab13489735d9b5b6e91634eab83922914a3310c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 9 Feb 2022 15:27:06 +0300 Subject: [PATCH 0370/1586] Documentation / MTD: Rename the intel-spi driver Since the driver is renamed (and moved) update the BIOS upgrade guide accordingly from intel-spi to spi-intel. Keep the guide under MTD documentation because this is pretty much still about MTD and SPI-NOR. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Mauro Lima Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220209122706.42439-4-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- Documentation/driver-api/mtd/index.rst | 2 +- .../driver-api/mtd/{intel-spi.rst => spi-intel.rst} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename Documentation/driver-api/mtd/{intel-spi.rst => spi-intel.rst} (94%) diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst index 436ba5a851d7c..6a4278f409d78 100644 --- a/Documentation/driver-api/mtd/index.rst +++ b/Documentation/driver-api/mtd/index.rst @@ -7,6 +7,6 @@ Memory Technology Device (MTD) .. toctree:: :maxdepth: 1 - intel-spi + spi-intel nand_ecc spi-nor diff --git a/Documentation/driver-api/mtd/intel-spi.rst b/Documentation/driver-api/mtd/spi-intel.rst similarity index 94% rename from Documentation/driver-api/mtd/intel-spi.rst rename to Documentation/driver-api/mtd/spi-intel.rst index 0465f68792625..df854f20ead15 100644 --- a/Documentation/driver-api/mtd/intel-spi.rst +++ b/Documentation/driver-api/mtd/spi-intel.rst @@ -1,5 +1,5 @@ ============================== -Upgrading BIOS using intel-spi +Upgrading BIOS using spi-intel ============================== Many Intel CPUs like Baytrail and Braswell include SPI serial flash host @@ -11,12 +11,12 @@ avoid accidental (or on purpose) overwrite of the content. Not all manufacturers protect the SPI serial flash, mainly because it allows upgrading the BIOS image directly from an OS. -The intel-spi driver makes it possible to read and write the SPI serial +The spi-intel driver makes it possible to read and write the SPI serial flash, if certain protection bits are not set and locked. If it finds any of them set, the whole MTD device is made read-only to prevent partial overwrites. By default the driver exposes SPI serial flash contents as read-only but it can be changed from kernel command line, -passing "intel-spi.writeable=1". +passing "spi_intel.writeable=1". Please keep in mind that overwriting the BIOS image on SPI serial flash might render the machine unbootable and requires special equipment like @@ -32,7 +32,7 @@ Linux. serial flash. Distros like Debian and Fedora have this prepackaged with name "mtd-utils". - 3) Add "intel-spi.writeable=1" to the kernel command line and reboot + 3) Add "spi_intel.writeable=1" to the kernel command line and reboot the board (you can also reload the driver passing "writeable=1" as module parameter to modprobe). -- GitLab From 5790597d7113faabb1714d3d1efa268e36eb4811 Mon Sep 17 00:00:00 2001 From: Li-hao Kuo Date: Mon, 14 Feb 2022 10:20:11 +0800 Subject: [PATCH 0371/1586] spi: Fix warning for Clang build and simplify code Clang build fails with spi-sunplus-sp7021.c:405:2: error: variable 'ret' is used uninitialized whenever switch default is taken default: simplify code Restore initializing ret. and add return error at default Fixes: 47e8fe57a66f ("spi: Modify irq request position and modify parameters") Reported-by: Tom Rix Reported-by: kernel test robot Reported-by: Nathan Chancellor Reported-by: Mark Brown Signed-off-by: Li-hao Kuo Link: https://lore.kernel.org/r/7d91e6ce29f9a8df2c53a47b4b977664020e237a.1644805060.git.lhjeff911@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sunplus-sp7021.c | 38 ++++++++------------------------ 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c index ba5ed9f7277a3..ade7a0fca8cb3 100644 --- a/drivers/spi/spi-sunplus-sp7021.c +++ b/drivers/spi/spi-sunplus-sp7021.c @@ -69,12 +69,6 @@ #define SP7021_SPI_DATA_SIZE (255) #define SP7021_FIFO_DATA_LEN (16) -enum SP_SPI_MODE { - SP7021_SLAVE_READ = 0, - SP7021_SLAVE_WRITE = 1, - SP7021_SPI_IDLE = 2, -}; - enum { SP7021_MASTER_MODE = 0, SP7021_SLAVE_MODE = 1, @@ -375,40 +369,26 @@ static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi { struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr); struct device *dev = pspim->dev; - int mode, ret; + int ret; - mode = SP7021_SPI_IDLE; - if (xfer->tx_buf && xfer->rx_buf) { - dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__); - return -EINVAL; - } else if (xfer->tx_buf) { + if (xfer->tx_buf && !xfer->rx_buf) { xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf, xfer->len, DMA_TO_DEVICE); if (dma_mapping_error(dev, xfer->tx_dma)) return -ENOMEM; - mode = SP7021_SLAVE_WRITE; - } else if (xfer->rx_buf) { + ret = sp7021_spi_slave_tx(spi, xfer); + dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE); + } else if (xfer->rx_buf && !xfer->tx_buf) { xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len, DMA_FROM_DEVICE); if (dma_mapping_error(dev, xfer->rx_dma)) return -ENOMEM; - mode = SP7021_SLAVE_READ; - } - - switch (mode) { - case SP7021_SLAVE_WRITE: - ret = sp7021_spi_slave_tx(spi, xfer); - break; - case SP7021_SLAVE_READ: ret = sp7021_spi_slave_rx(spi, xfer); - break; - default: - break; - } - if (xfer->tx_buf) - dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE); - if (xfer->rx_buf) dma_unmap_single(dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE); + } else { + dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__); + return -EINVAL; + } spi_finalize_current_transfer(ctlr); return ret; -- GitLab From f48dc6b9664963107e500aecfc2f4df27dc5afb6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Feb 2022 00:19:54 +0100 Subject: [PATCH 0372/1586] spi: Retire legacy GPIO handling All drivers using GPIOs as chip select have been rewritten to use GPIO descriptors passing the ->use_gpio_descriptors flag. Retire the code and fields used by the legacy GPIO API. Do not drop the ->use_gpio_descriptors flag: it now only indicates that we want to use GPIOs in addition to native chip selects. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220210231954.807904-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 125 +++++++++------------------------------- include/linux/spi/spi.h | 14 +---- 2 files changed, 30 insertions(+), 109 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index d9832d8e9f446..85f8ae4cc0c0d 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -542,7 +541,6 @@ struct spi_device *spi_alloc_device(struct spi_controller *ctlr) spi->dev.parent = &ctlr->dev; spi->dev.bus = &spi_bus_type; spi->dev.release = spidev_release; - spi->cs_gpio = -ENOENT; spi->mode = ctlr->buswidth_override_bits; spin_lock_init(&spi->statistics.lock); @@ -606,11 +604,8 @@ static int __spi_add_device(struct spi_device *spi) return -ENODEV; } - /* Descriptors take precedence */ if (ctlr->cs_gpiods) spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select]; - else if (ctlr->cs_gpios) - spi->cs_gpio = ctlr->cs_gpios[spi->chip_select]; /* * Drivers may modify this initial i/o setup, but will @@ -940,39 +935,30 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force) spi->controller->last_cs_enable = enable; spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH; - if ((spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) || - !spi->controller->set_cs_timing) && !activate) { + if ((spi->cs_gpiod || !spi->controller->set_cs_timing) && !activate) { spi_delay_exec(&spi->cs_hold, NULL); } if (spi->mode & SPI_CS_HIGH) enable = !enable; - if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) { + if (spi->cs_gpiod) { if (!(spi->mode & SPI_NO_CS)) { - if (spi->cs_gpiod) { - /* - * Historically ACPI has no means of the GPIO polarity and - * thus the SPISerialBus() resource defines it on the per-chip - * basis. In order to avoid a chain of negations, the GPIO - * polarity is considered being Active High. Even for the cases - * when _DSD() is involved (in the updated versions of ACPI) - * the GPIO CS polarity must be defined Active High to avoid - * ambiguity. That's why we use enable, that takes SPI_CS_HIGH - * into account. - */ - if (has_acpi_companion(&spi->dev)) - gpiod_set_value_cansleep(spi->cs_gpiod, !enable); - else - /* Polarity handled by GPIO library */ - gpiod_set_value_cansleep(spi->cs_gpiod, activate); - } else { - /* - * Invert the enable line, as active low is - * default for SPI. - */ - gpio_set_value_cansleep(spi->cs_gpio, !enable); - } + /* + * Historically ACPI has no means of the GPIO polarity and + * thus the SPISerialBus() resource defines it on the per-chip + * basis. In order to avoid a chain of negations, the GPIO + * polarity is considered being Active High. Even for the cases + * when _DSD() is involved (in the updated versions of ACPI) + * the GPIO CS polarity must be defined Active High to avoid + * ambiguity. That's why we use enable, that takes SPI_CS_HIGH + * into account. + */ + if (has_acpi_companion(&spi->dev)) + gpiod_set_value_cansleep(spi->cs_gpiod, !enable); + else + /* Polarity handled by GPIO library */ + gpiod_set_value_cansleep(spi->cs_gpiod, activate); } /* Some SPI masters need both GPIO CS & slave_select */ if ((spi->controller->flags & SPI_MASTER_GPIO_SS) && @@ -982,8 +968,7 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force) spi->controller->set_cs(spi, !enable); } - if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) || - !spi->controller->set_cs_timing) { + if (spi->cs_gpiod || !spi->controller->set_cs_timing) { if (activate) spi_delay_exec(&spi->cs_setup, NULL); else @@ -2827,46 +2812,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev, } EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller); -#ifdef CONFIG_OF -static int of_spi_get_gpio_numbers(struct spi_controller *ctlr) -{ - int nb, i, *cs; - struct device_node *np = ctlr->dev.of_node; - - if (!np) - return 0; - - nb = of_gpio_named_count(np, "cs-gpios"); - ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect); - - /* Return error only for an incorrectly formed cs-gpios property */ - if (nb == 0 || nb == -ENOENT) - return 0; - else if (nb < 0) - return nb; - - cs = devm_kcalloc(&ctlr->dev, ctlr->num_chipselect, sizeof(int), - GFP_KERNEL); - ctlr->cs_gpios = cs; - - if (!ctlr->cs_gpios) - return -ENOMEM; - - for (i = 0; i < ctlr->num_chipselect; i++) - cs[i] = -ENOENT; - - for (i = 0; i < nb; i++) - cs[i] = of_get_named_gpio(np, "cs-gpios", i); - - return 0; -} -#else -static int of_spi_get_gpio_numbers(struct spi_controller *ctlr) -{ - return 0; -} -#endif - /** * spi_get_gpio_descs() - grab chip select GPIOs for the master * @ctlr: The SPI master to grab GPIO descriptors for @@ -3051,22 +2996,15 @@ int spi_register_controller(struct spi_controller *ctlr) */ dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num); - if (!spi_controller_is_slave(ctlr)) { - if (ctlr->use_gpio_descriptors) { - status = spi_get_gpio_descs(ctlr); - if (status) - goto free_bus_id; - /* - * A controller using GPIO descriptors always - * supports SPI_CS_HIGH if need be. - */ - ctlr->mode_bits |= SPI_CS_HIGH; - } else { - /* Legacy code path for GPIOs from DT */ - status = of_spi_get_gpio_numbers(ctlr); - if (status) - goto free_bus_id; - } + if (!spi_controller_is_slave(ctlr) && ctlr->use_gpio_descriptors) { + status = spi_get_gpio_descs(ctlr); + if (status) + goto free_bus_id; + /* + * A controller using GPIO descriptors always + * supports SPI_CS_HIGH if need be. + */ + ctlr->mode_bits |= SPI_CS_HIGH; } /* @@ -3555,12 +3493,6 @@ int spi_setup(struct spi_device *spi) */ bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD | SPI_NO_TX | SPI_NO_RX); - /* - * Nothing prevents from working with active-high CS in case if it - * is driven by GPIO. - */ - if (gpio_is_valid(spi->cs_gpio)) - bad_bits &= ~SPI_CS_HIGH; ugly_bits = bad_bits & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL); @@ -3686,8 +3618,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) * cs_change is set for each transfer. */ if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) || - spi->cs_gpiod || - gpio_is_valid(spi->cs_gpio))) { + spi->cs_gpiod)) { size_t maxsize; int ret; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index c3746ff35691e..579d71cdf6fab 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -137,9 +137,6 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * for driver coldplugging, and in uevents used for hotplugging * @driver_override: If the name of a driver is written to this attribute, then * the device will bind to the named driver and only the named driver. - * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when - * not using a GPIO line) use cs_gpiod in new drivers by opting in on - * the spi_master. * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when * not using a GPIO line) * @word_delay: delay to be inserted between consecutive @@ -186,7 +183,6 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - int cs_gpio; /* LEGACY: chip select gpio */ struct gpio_desc *cs_gpiod; /* chip select gpio desc */ struct spi_delay word_delay; /* inter-word delay */ /* CS delays */ @@ -418,17 +414,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * controller has native support for memory like operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller - * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per - * CS number. Any individual value may be -ENOENT for CS lines that - * are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods - * in new drivers. * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab - * GPIO descriptors rather than using global GPIO numbers grabbed by the - * driver. This will fill in @cs_gpiods and @cs_gpios should not be used, - * and SPI devices will have the cs_gpiod assigned rather than cs_gpio. + * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have + * the cs_gpiod assigned if a GPIO line is found for the chipselect. * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will * fill in this field with the first unused native CS, to be used by SPI * controller drivers that need to drive a native CS when using GPIO CS. @@ -642,7 +633,6 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; /* gpio chip select */ - int *cs_gpios; struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; -- GitLab From 7ba564722d98e3e7bc3922ad4f2885ca0336674e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:44 -0800 Subject: [PATCH 0373/1586] iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA This CONFIG option originally only referred to the Shared Virtual Address (SVA) library. But it is now also used for non-library portions of code. Drop the "_LIB" suffix so that there is just one configuration option for all code relating to SVA. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-2-fenghua.yu@intel.com --- drivers/iommu/Kconfig | 6 +++--- drivers/iommu/Makefile | 2 +- drivers/iommu/intel/Kconfig | 2 +- drivers/iommu/iommu-sva-lib.h | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3eb68fa1b8cc0..c79a0df090c08 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -144,8 +144,8 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH -# Shared Virtual Addressing library -config IOMMU_SVA_LIB +# Shared Virtual Addressing +config IOMMU_SVA bool select IOASID @@ -379,7 +379,7 @@ config ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 - select IOMMU_SVA_LIB + select IOMMU_SVA select MMU_NOTIFIER help Support for sharing process address spaces with devices using the diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index bc7f730edbb0b..44475a9b3eeaf 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 247d0f2d5fdf7..39a06d245f12e 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -52,7 +52,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID - select IOMMU_SVA_LIB + select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 031155010ca85..95dc3ebc19283 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -17,7 +17,7 @@ struct device; struct iommu_fault; struct iopf_queue; -#ifdef CONFIG_IOMMU_SVA_LIB +#ifdef CONFIG_IOMMU_SVA int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); @@ -28,7 +28,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -#else /* CONFIG_IOMMU_SVA_LIB */ +#else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) { return -ENODEV; @@ -64,5 +64,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } -#endif /* CONFIG_IOMMU_SVA_LIB */ +#endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ -- GitLab From 7a853c2d5951419fdf3c1c9d2b6f5a38f6a6857d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:45 -0800 Subject: [PATCH 0374/1586] mm: Change CONFIG option for mm->pasid field This currently depends on CONFIG_IOMMU_SUPPORT. But it is only needed when CONFIG_IOMMU_SVA option is enabled. Change the CONFIG guards around definition and initialization of mm->pasid field. Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-3-fenghua.yu@intel.com --- include/linux/mm_types.h | 2 +- kernel/fork.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5140e5feb4866..c5cbfd7915adb 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -631,7 +631,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif } __randomize_layout; diff --git a/kernel/fork.c b/kernel/fork.c index d75a528f7b219..6ee7551d3bd25 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1021,7 +1021,7 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) static void mm_init_pasid(struct mm_struct *mm) { -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA mm->pasid = INIT_PASID; #endif } -- GitLab From bd265aadd1b7eabd4bf381dc4b0bc48ebd569e88 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Dec 2021 06:27:59 -0800 Subject: [PATCH 0375/1586] MAINTAINERS: Add Frederic and Neeraj to their RCU files Adding Frederic as an RCU maintainer for kernel/rcu/tree_nocb.h given his work with offloading and de-offloading callbacks from CPUs. Also adding Neeraj for kernel/rcu/tasks.h given his focused work on RCU Tasks Trace. As in I am reasonably certain that each understands the full contents of the corresponding file. Cc: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Acked-by: Frederic Weisbecker --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c9143848..5ad49de532b1a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16298,6 +16298,8 @@ F: tools/testing/selftests/resctrl/ READ-COPY UPDATE (RCU) M: "Paul E. McKenney" +M: Frederic Weisbecker (kernel/rcu/tree_nocb.h) +M: Neeraj Upadhyay (kernel/rcu/tasks.h) M: Josh Triplett R: Steven Rostedt R: Mathieu Desnoyers -- GitLab From 150154aae4311e7e6458903baecdc8fffe981ed3 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 1 Dec 2021 10:20:53 +0100 Subject: [PATCH 0376/1586] rcu: Fix description of kvfree_rcu() The kvfree_rcu() header comment's description of the "ptr" parameter is unclear, therefore rephrase it to make it clear that it is a pointer to the memory to eventually be passed to kvfree(). Reported-by: Steven Rostedt Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 88b42eb464068..9d7df8d36af07 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * kvfree_rcu(ptr); * - * where @ptr is a pointer to kvfree(). + * where @ptr is the pointer to be freed by kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() -- GitLab From 218b957a6959a2fb5b3967fc824072bb89ac2611 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 8 Dec 2021 23:41:53 +0000 Subject: [PATCH 0377/1586] rcu: Add mutex for rcu boost kthread spawning and affinity setting As we handle parallel CPU bringup, we will need to take care to avoid spawning multiple boost threads, or race conditions when setting their affinity. Spotted by Paul McKenney. Signed-off-by: David Woodhouse Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + kernel/rcu/tree.h | 3 +++ kernel/rcu/tree_plugin.h | 10 ++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0b..d1d1a8c51223b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4570,6 +4570,7 @@ static void __init rcu_init_one(void) init_waitqueue_head(&rnp->exp_wq[2]); init_waitqueue_head(&rnp->exp_wq[3]); spin_lock_init(&rnp->exp_lock); + mutex_init(&rnp->boost_kthread_mutex); } } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd085..3b8b60de07c38 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -110,6 +110,9 @@ struct rcu_node { /* side effect, not as a lock. */ unsigned long boost_time; /* When to start boosting (jiffies). */ + struct mutex boost_kthread_mutex; + /* Exclusion for thread spawning and affinity */ + /* manipulation. */ struct task_struct *boost_kthread_task; /* kthread that takes care of priority */ /* boosting for this rcu_node structure. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a15..07845dcd33c5e 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1172,15 +1172,16 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) struct sched_param sp; struct task_struct *t; + mutex_lock(&rnp->boost_kthread_mutex); if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) - return; + goto out; rcu_state.boost = 1; t = kthread_create(rcu_boost_kthread, (void *)rnp, "rcub/%d", rnp_index); if (WARN_ON_ONCE(IS_ERR(t))) - return; + goto out; raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->boost_kthread_task = t; @@ -1188,6 +1189,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ + + out: + mutex_unlock(&rnp->boost_kthread_mutex); } /* @@ -1210,6 +1214,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) return; if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; + mutex_lock(&rnp->boost_kthread_mutex); for_each_leaf_node_possible_cpu(rnp, cpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) @@ -1218,6 +1223,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) if (cpumask_weight(cm) == 0) cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); set_cpus_allowed_ptr(t, cm); + mutex_unlock(&rnp->boost_kthread_mutex); free_cpumask_var(cm); } -- GitLab From 1fe09ebe7a9c9907f516779fbe4954165dd01529 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Dec 2021 09:30:33 -0800 Subject: [PATCH 0378/1586] rcu: Inline __call_rcu() into call_rcu() Because __call_rcu() is invoked only by call_rcu(), this commit inlines the former into the latter. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 91 ++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d1d1a8c51223b..f1bb7ccc00847 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2995,9 +2995,47 @@ static void check_cb_ovld(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); } -/* Helper function for call_rcu() and friends. */ -static void -__call_rcu(struct rcu_head *head, rcu_callback_t func) +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. + * + * RCU read-side critical sections are delimited by rcu_read_lock() + * and rcu_read_unlock(), and may be nested. In addition, but only in + * v5.0 and later, regions of code across which interrupts, preemption, + * or softirqs have been disabled also serve as RCU read-side critical + * sections. This includes hardware interrupt handlers, softirq handlers, + * and NMI handlers. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). + * + * Implementation of these memory-ordering guarantees is described here: + * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. + */ +void call_rcu(struct rcu_head *head, rcu_callback_t func) { static atomic_t doublefrees; unsigned long flags; @@ -3011,7 +3049,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) /* * Probable double call_rcu(), so leak the callback. * Use rcu:rcu_callback trace event to find the previous - * time callback was passed to __call_rcu(). + * time callback was passed to call_rcu(). */ if (atomic_inc_return(&doublefrees) < 4) { pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func); @@ -3060,51 +3098,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) local_irq_restore(flags); } } - -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. - * - * RCU read-side critical sections are delimited by rcu_read_lock() - * and rcu_read_unlock(), and may be nested. In addition, but only in - * v5.0 and later, regions of code across which interrupts, preemption, - * or softirqs have been disabled also serve as RCU read-side critical - * sections. This includes hardware interrupt handlers, softirq handlers, - * and NMI handlers. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section. On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu(). It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section. Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - * - * Implementation of these memory-ordering guarantees is described here: - * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. - */ -void call_rcu(struct rcu_head *head, rcu_callback_t func) -{ - __call_rcu(head, func); -} EXPORT_SYMBOL_GPL(call_rcu); -- GitLab From d818cc76e2b4d5f6cebf8c7ce1160d652d7e572b Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sun, 26 Dec 2021 08:52:04 +0800 Subject: [PATCH 0379/1586] kasan: Record work creation stack trace with interrupts enabled Recording the work creation stack trace for KASAN reports in call_rcu() is expensive, due to unwinding the stack, but also due to acquiring depot_lock inside stackdepot (which may be contended). Because calling kasan_record_aux_stack_noalloc() does not require interrupts to already be disabled, this may unnecessarily extend the time with interrupts disabled. Therefore, move calling kasan_record_aux_stack() before the section with interrupts disabled. Acked-by: Marco Elver Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f1bb7ccc00847..ca8d7dd026eeb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3060,8 +3060,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) } head->func = func; head->next = NULL; - local_irq_save(flags); kasan_record_aux_stack_noalloc(head); + local_irq_save(flags); rdp = this_cpu_ptr(&rcu_data); /* Add the callback to our list. */ -- GitLab From c09929031018913b5783872a8b8cdddef4a543c7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Jan 2022 10:34:34 -0800 Subject: [PATCH 0380/1586] rcu: Mark writes to the rcu_segcblist structure's ->flags field KCSAN reports data races between the rcu_segcblist_clear_flags() and rcu_segcblist_set_flags() functions, though misreporting the latter as a call to rcu_segcblist_is_enabled() from call_rcu(). This commit converts the updates of this field to WRITE_ONCE(), relying on the resulting unmarked reads to continue to detect buggy concurrent writes to this field. Reported-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker --- kernel/rcu/rcu_segcblist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index e373fbe44da5e..431cee212467d 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags |= flags; + WRITE_ONCE(rsclp->flags, rsclp->flags | flags); } static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags &= ~flags; + WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags); } static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp, -- GitLab From 58d4292bd037b01fbb940a5170817f7d40caa9d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 14 Jan 2022 16:07:28 -0800 Subject: [PATCH 0381/1586] rcu: Uninline multi-use function: finish_rcuwait() This is a rarely used function, so uninlining its 3 instructions is probably a win or a wash - but the main motivation is to make independent of task_struct details. Signed-off-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcuwait.h | 6 +----- kernel/rcu/update.c | 7 +++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 61c56cca95c43..8052d34da7826 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w) rcu_assign_pointer(w->task, current); } -static inline void finish_rcuwait(struct rcuwait *w) -{ - rcu_assign_pointer(w->task, NULL); - __set_current_state(TASK_RUNNING); -} +extern void finish_rcuwait(struct rcuwait *w); #define rcuwait_wait_event(w, condition, state) \ ({ \ diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 156892c22bb56..180ff9c41fa87 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -407,6 +407,13 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, } EXPORT_SYMBOL_GPL(__wait_rcu_gp); +void finish_rcuwait(struct rcuwait *w) +{ + rcu_assign_pointer(w->task, NULL); + __set_current_state(TASK_RUNNING); +} +EXPORT_SYMBOL_GPL(finish_rcuwait); + #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head) { -- GitLab From e6339d3b443c436c3b8f45eefec2212a8c07065d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 14 Jan 2022 16:16:55 -0800 Subject: [PATCH 0382/1586] rcu: Remove __read_mostly annotations from rcu_scheduler_active externs Remove the __read_mostly attributes from the rcu_scheduler_active extern declarations, because these attributes are ignored for prototypes and we'd have to include the full header to gain this functionally pointless attribute defined. Signed-off-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- include/linux/rcutree.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9d7df8d36af07..e7c39c200e2b0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d6694001..76665db179fa1 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { } void exit_rcu(void); void rcu_scheduler_starting(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); -- GitLab From 6a2c1d450a6a328027280a854019c55de989e14e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 23 Jan 2022 10:38:53 -0800 Subject: [PATCH 0383/1586] rcu: Replace cpumask_weight with cpumask_empty where appropriate In some places, RCU code calls cpumask_weight() to check if any bit of a given cpumask is set. We can do it more efficiently with cpumask_empty() because cpumask_empty() stops traversing the cpumask as soon as it finds first set bit, while cpumask_weight() counts all bits unconditionally. Signed-off-by: Yury Norov Acked-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_nocb.h | 4 ++-- kernel/rcu/tree_plugin.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index eeafb546a7a09..f83c7b1d61103 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1169,7 +1169,7 @@ void __init rcu_init_nohz(void) struct rcu_data *rdp; #if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) + if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) need_rcu_nocb_mask = true; #endif /* #if defined(CONFIG_NO_HZ_FULL) */ @@ -1348,7 +1348,7 @@ static void __init rcu_organize_nocb_kthreads(void) */ void rcu_bind_current_to_nocb(void) { - if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask)) + if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask)) WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); } EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 07845dcd33c5e..efd0c87d2ffae 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1220,7 +1220,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU)); - if (cpumask_weight(cm) == 0) + if (cpumask_empty(cm)) cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); set_cpus_allowed_ptr(t, cm); mutex_unlock(&rnp->boost_kthread_mutex); -- GitLab From 631e3893c35e116d16b81b41bea6ba2143db4fa4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 Feb 2022 20:18:10 +0100 Subject: [PATCH 0384/1586] ACPI: PM: Print additional debug message in acpi_s2idle_wake() Make acpi_s2idle_wake() print an additional debug message when the SCI is going to be rearmed for system wakeup to help diagnose wakeup-related issues. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index d4fbea91ab6b8..d068ff42fce49 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -758,6 +758,8 @@ bool acpi_s2idle_wake(void) return true; } + pm_pr_dbg("Rearming ACPI SCI for wakeup\n"); + pm_wakeup_clear(acpi_sci_irq); rearm_wake_irq(acpi_sci_irq); } -- GitLab From 977dc3082285206e7b1fcbc4496671194cfb7980 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 Feb 2022 20:18:56 +0100 Subject: [PATCH 0385/1586] ACPI: EC / PM: Print additional debug message in acpi_ec_dispatch_gpe() Make acpi_ec_dispatch_gpe() print an additional debug message after seeing the EC GPE status bit set to help diagnose wakeup-related issues. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 46710380a4029..f039565d59ad7 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2081,8 +2081,11 @@ bool acpi_ec_dispatch_gpe(void) */ spin_lock_irq(&first_ec->lock); - if (acpi_ec_gpe_status_set(first_ec)) + if (acpi_ec_gpe_status_set(first_ec)) { + pm_pr_dbg("ACPI EC GPE status set\n"); + work_in_progress = advance_transaction(first_ec, false); + } spin_unlock_irq(&first_ec->lock); -- GitLab From 7a5fbc9bcba5325a45297a4ba00091f39a63a1ed Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:46 -0800 Subject: [PATCH 0386/1586] iommu/ioasid: Introduce a helper to check for valid PASIDs Define a pasid_valid() helper to check if a given PASID is valid. [ bp: Massage commit message. ] Suggested-by: Ashok Raj Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-4-fenghua.yu@intel.com --- include/linux/ioasid.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6bb..2237f64dbaae4 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -41,6 +41,10 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -78,5 +82,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ -- GitLab From a6cbd44093ef305b02ad5f80ed54abf0148a696c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:47 -0800 Subject: [PATCH 0387/1586] kernel/fork: Initialize mm's PASID A new mm doesn't have a PASID yet when it's created. Initialize the mm's PASID on fork() or for init_mm to INVALID_IOASID (-1). INIT_PASID (0) is reserved for kernel legacy DMA PASID. It cannot be allocated to a user process. Initializing the process's PASID to 0 may cause confusion that's why the process uses the reserved kernel legacy DMA PASID. Initializing the PASID to INVALID_IOASID (-1) explicitly tells the process doesn't have a valid PASID yet. Even though the only user of mm_pasid_init() is in fork.c, define it in as the first of three mm/pasid life cycle functions (init/set/drop) to keep these all together. Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-5-fenghua.yu@intel.com --- include/linux/sched/mm.h | 10 ++++++++++ kernel/fork.c | 10 ++-------- mm/init-mm.c | 4 ++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index aa5f09ca5bcf4..c74d1edbac2f0 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -433,4 +434,13 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 6ee7551d3bd25..deacd2c17a7f1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -1019,13 +1020,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static void mm_init_pasid(struct mm_struct *mm) -{ -#ifdef CONFIG_IOMMU_SVA - mm->pasid = INIT_PASID; -#endif -} - static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1054,7 +1048,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); - mm_init_pasid(mm); + mm_pasid_init(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); diff --git a/mm/init-mm.c b/mm/init-mm.c index b4a6f38fb51dd..fbe7844d0912f 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,6 +10,7 @@ #include #include +#include #include #ifndef INIT_MM_CONTEXT @@ -38,6 +39,9 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, +#ifdef CONFIG_IOMMU_SVA + .pasid = INVALID_IOASID, +#endif INIT_MM_CONTEXT(init_mm) }; -- GitLab From 8cb37a5974a48569aab8a1736d21399fddbdbdb2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 10:05:20 +0100 Subject: [PATCH 0388/1586] stack: Introduce CONFIG_RANDOMIZE_KSTACK_OFFSET The randomize_kstack_offset feature is unconditionally compiled in when the architecture supports it. To add constraints on compiler versions, we require a dedicated Kconfig variable. Therefore, introduce RANDOMIZE_KSTACK_OFFSET. Furthermore, this option is now also configurable by EXPERT kernels: while the feature is supposed to have zero performance overhead when disabled, due to its use of static branches, there are few cases where giving a distribution the option to disable the feature entirely makes sense. For example, in very resource constrained environments, which would never enable the feature to begin with, in which case the additional kernel code size increase would be redundant. Signed-off-by: Marco Elver Reviewed-by: Nathan Chancellor Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220131090521.1947110-1-elver@google.com --- arch/Kconfig | 23 ++++++++++++++++++----- include/linux/randomize_kstack.h | 5 +++++ init/main.c | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b213..2cde48d9b77c4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1159,16 +1159,29 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET to the compiler, so it will attempt to add canary checks regardless of the static branch state. -config RANDOMIZE_KSTACK_OFFSET_DEFAULT - bool "Randomize kernel stack offset on syscall entry" +config RANDOMIZE_KSTACK_OFFSET + bool "Support for randomizing kernel stack offset on syscall entry" if EXPERT + default y depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET help The kernel stack offset can be randomized (after pt_regs) by roughly 5 bits of entropy, frustrating memory corruption attacks that depend on stack address determinism or - cross-syscall address exposures. This feature is controlled - by kernel boot param "randomize_kstack_offset=on/off", and this - config chooses the default boot state. + cross-syscall address exposures. + + The feature is controlled via the "randomize_kstack_offset=on/off" + kernel boot param, and if turned off has zero overhead due to its use + of static branches (see JUMP_LABEL). + + If unsure, say Y. + +config RANDOMIZE_KSTACK_OFFSET_DEFAULT + bool "Default state of kernel stack offset randomization" + depends on RANDOMIZE_KSTACK_OFFSET + help + Kernel stack offset randomization is controlled by kernel boot param + "randomize_kstack_offset=on/off", and this config chooses the default + boot state. config ARCH_OPTIONAL_KERNEL_RWX def_bool n diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index bebc911161b6f..91f1b990a3c35 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -2,6 +2,7 @@ #ifndef _LINUX_RANDOMIZE_KSTACK_H #define _LINUX_RANDOMIZE_KSTACK_H +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET #include #include #include @@ -50,5 +51,9 @@ void *__builtin_alloca(size_t size); raw_cpu_write(kstack_offset, offset); \ } \ } while (0) +#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ +#define add_random_kstack_offset() do { } while (0) +#define choose_random_kstack_offset(rand) do { } while (0) +#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #endif diff --git a/init/main.c b/init/main.c index 65fa2e41a9c09..560f45c27ffe8 100644 --- a/init/main.c +++ b/init/main.c @@ -853,7 +853,7 @@ static void __init mm_init(void) pti_init(); } -#ifdef CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); DEFINE_PER_CPU(u32, kstack_offset); -- GitLab From efa90c11f62e6b7252fb75efe2787056872a627c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 10:05:21 +0100 Subject: [PATCH 0389/1586] stack: Constrain and fix stack offset randomization with Clang builds All supported versions of Clang perform auto-init of __builtin_alloca() when stack auto-init is on (CONFIG_INIT_STACK_ALL_{ZERO,PATTERN}). add_random_kstack_offset() uses __builtin_alloca() to add a stack offset. This means, when CONFIG_INIT_STACK_ALL_{ZERO,PATTERN} is enabled, add_random_kstack_offset() will auto-init that unused portion of the stack used to add an offset. There are several problems with this: 1. These offsets can be as large as 1023 bytes. Performing memset() on them isn't exactly cheap, and this is done on every syscall entry. 2. Architectures adding add_random_kstack_offset() to syscall entry implemented in C require them to be 'noinstr' (e.g. see x86 and s390). The potential problem here is that a call to memset may occur, which is not noinstr. A x86_64 defconfig kernel with Clang 11 and CONFIG_VMLINUX_VALIDATION shows: | vmlinux.o: warning: objtool: do_syscall_64()+0x9d: call to memset() leaves .noinstr.text section | vmlinux.o: warning: objtool: do_int80_syscall_32()+0xab: call to memset() leaves .noinstr.text section | vmlinux.o: warning: objtool: __do_fast_syscall_32()+0xe2: call to memset() leaves .noinstr.text section | vmlinux.o: warning: objtool: fixup_bad_iret()+0x2f: call to memset() leaves .noinstr.text section Clang 14 (unreleased) will introduce a way to skip alloca initialization via __builtin_alloca_uninitialized() (https://reviews.llvm.org/D115440). Constrain RANDOMIZE_KSTACK_OFFSET to only be enabled if no stack auto-init is enabled, the compiler is GCC, or Clang is version 14+. Use __builtin_alloca_uninitialized() if the compiler provides it, as is done by Clang 14. Link: https://lkml.kernel.org/r/YbHTKUjEejZCLyhX@elver.google.com Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Signed-off-by: Marco Elver Reviewed-by: Nathan Chancellor Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220131090521.1947110-2-elver@google.com --- arch/Kconfig | 1 + include/linux/randomize_kstack.h | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 2cde48d9b77c4..c5b50bfe31c1a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1163,6 +1163,7 @@ config RANDOMIZE_KSTACK_OFFSET bool "Support for randomizing kernel stack offset on syscall entry" if EXPERT default y depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET + depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000 help The kernel stack offset can be randomized (after pt_regs) by roughly 5 bits of entropy, frustrating memory corruption diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 91f1b990a3c35..1468caf001c05 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -17,8 +17,20 @@ DECLARE_PER_CPU(u32, kstack_offset); * alignment. Also, since this use is being explicitly masked to a max of * 10 bits, stack-clash style attacks are unlikely. For more details see * "VLAs" in Documentation/process/deprecated.rst + * + * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently + * only with Clang and not GCC). Initializing the unused area on each syscall + * entry is expensive, and generating an implicit call to memset() may also be + * problematic (such as in noinstr functions). Therefore, if the compiler + * supports it (which it should if it initializes allocas), always use the + * "uninitialized" variant of the builtin. */ -void *__builtin_alloca(size_t size); +#if __has_builtin(__builtin_alloca_uninitialized) +#define __kstack_alloca __builtin_alloca_uninitialized +#else +#define __kstack_alloca __builtin_alloca +#endif + /* * Use, at most, 10 bits of entropy. We explicitly cap this to keep the * "VLA" from being unbounded (see above). 10 bits leaves enough room for @@ -37,7 +49,7 @@ void *__builtin_alloca(size_t size); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ + u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ -- GitLab From 9aa60f3cbafb0facc15a6114df7616a1bf23a88d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Feb 2022 18:40:20 +0100 Subject: [PATCH 0390/1586] ACPI: EC: Do not return result from advance_transaction() Notice that the if the event state is EC_EVENT_READY, the event handling work cannot be pending, so it is not necessary to check the return value of queue_work() in acpi_ec_submit_event(). Moreover, whether or not there is any EC work pending at the moment can always be checked by looking at the events_in_progress and queries_in_progress counters, so acpi_ec_submit_event() and consequently advance_transaction() need not return results. Accordingly, make acpi_ec_dispatch_gpe() always use the counters mentioned above (for first_ec) to check if there is any pending EC work to flush and turn both acpi_ec_submit_event() and advance_transaction() into void functions (again, because they were void functions in the past). While at it, add a clarifying comment about the acpi_ec_mask_events() call in advance_transaction(). Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index f039565d59ad7..e202c0c370b1a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -168,7 +168,7 @@ struct acpi_ec_query { }; static int acpi_ec_submit_query(struct acpi_ec *ec); -static bool advance_transaction(struct acpi_ec *ec, bool interrupt); +static void advance_transaction(struct acpi_ec *ec, bool interrupt); static void acpi_ec_event_handler(struct work_struct *work); struct acpi_ec *first_ec; @@ -441,11 +441,15 @@ static bool acpi_ec_submit_flushable_request(struct acpi_ec *ec) return true; } -static bool acpi_ec_submit_event(struct acpi_ec *ec) +static void acpi_ec_submit_event(struct acpi_ec *ec) { + /* + * It is safe to mask the events here, because acpi_ec_close_event() + * will run at least once after this. + */ acpi_ec_mask_events(ec); if (!acpi_ec_event_enabled(ec)) - return false; + return; if (ec->event_state == EC_EVENT_READY) { ec_dbg_evt("Command(%s) submitted/blocked", @@ -460,17 +464,11 @@ static bool acpi_ec_submit_event(struct acpi_ec *ec) * queue up the event work to start the same loop again. */ if (ec->events_to_process++ > 0) - return true; + return; ec->events_in_progress++; - return queue_work(ec_wq, &ec->work); + queue_work(ec_wq, &ec->work); } - - /* - * The event handling work has not been completed yet, so it needs to be - * flushed. - */ - return true; } static void acpi_ec_complete_event(struct acpi_ec *ec) @@ -655,11 +653,10 @@ static void acpi_ec_spurious_interrupt(struct acpi_ec *ec, struct transaction *t acpi_ec_mask_events(ec); } -static bool advance_transaction(struct acpi_ec *ec, bool interrupt) +static void advance_transaction(struct acpi_ec *ec, bool interrupt) { struct transaction *t = ec->curr; bool wakeup = false; - bool ret = false; u8 status; ec_dbg_stm("%s (%d)", interrupt ? "IRQ" : "TASK", smp_processor_id()); @@ -724,12 +721,10 @@ static bool advance_transaction(struct acpi_ec *ec, bool interrupt) out: if (status & ACPI_EC_FLAG_SCI) - ret = acpi_ec_submit_event(ec); + acpi_ec_submit_event(ec); if (wakeup && interrupt) wake_up(&ec->wait); - - return ret; } static void start_transaction(struct acpi_ec *ec) @@ -2051,6 +2046,11 @@ void acpi_ec_set_gpe_wake_mask(u8 action) acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); } +static bool acpi_ec_work_in_progress(struct acpi_ec *ec) +{ + return ec->events_in_progress + ec->queries_in_progress > 0; +} + bool acpi_ec_dispatch_gpe(void) { bool work_in_progress = false; @@ -2084,7 +2084,8 @@ bool acpi_ec_dispatch_gpe(void) if (acpi_ec_gpe_status_set(first_ec)) { pm_pr_dbg("ACPI EC GPE status set\n"); - work_in_progress = advance_transaction(first_ec, false); + advance_transaction(first_ec, false); + work_in_progress = acpi_ec_work_in_progress(first_ec); } spin_unlock_irq(&first_ec->lock); @@ -2102,8 +2103,7 @@ bool acpi_ec_dispatch_gpe(void) spin_lock_irq(&first_ec->lock); - work_in_progress = first_ec->events_in_progress + - first_ec->queries_in_progress > 0; + work_in_progress = acpi_ec_work_in_progress(first_ec); spin_unlock_irq(&first_ec->lock); } while (work_in_progress && !pm_wakeup_pending()); -- GitLab From 54b86141d71db2a16743cfa4a3417180d7feb24a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Feb 2022 18:40:55 +0100 Subject: [PATCH 0391/1586] ACPI: EC: Reduce indentation level in acpi_ec_submit_event() The indentation level in acpi_ec_submit_event() can be reduced, so do that and while at it fix a typo in the comment affected by that change. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index e202c0c370b1a..bba96df3b88a9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -451,24 +451,25 @@ static void acpi_ec_submit_event(struct acpi_ec *ec) if (!acpi_ec_event_enabled(ec)) return; - if (ec->event_state == EC_EVENT_READY) { - ec_dbg_evt("Command(%s) submitted/blocked", - acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY)); + if (ec->event_state != EC_EVENT_READY) + return; - ec->event_state = EC_EVENT_IN_PROGRESS; - /* - * If events_to_process is greqter than 0 at this point, the - * while () loop in acpi_ec_event_handler() is still running - * and incrementing events_to_process will cause it to invoke - * acpi_ec_submit_query() once more, so it is not necessary to - * queue up the event work to start the same loop again. - */ - if (ec->events_to_process++ > 0) - return; + ec_dbg_evt("Command(%s) submitted/blocked", + acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY)); - ec->events_in_progress++; - queue_work(ec_wq, &ec->work); - } + ec->event_state = EC_EVENT_IN_PROGRESS; + /* + * If events_to_process is greater than 0 at this point, the while () + * loop in acpi_ec_event_handler() is still running and incrementing + * events_to_process will cause it to invoke acpi_ec_submit_query() once + * more, so it is not necessary to queue up the event work to start the + * same loop again. + */ + if (ec->events_to_process++ > 0) + return; + + ec->events_in_progress++; + queue_work(ec_wq, &ec->work); } static void acpi_ec_complete_event(struct acpi_ec *ec) -- GitLab From 13a62d0e13308d62426c38223a3b6f78878f2173 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Feb 2022 18:43:14 +0100 Subject: [PATCH 0392/1586] ACPI: EC: Rearrange code in acpi_ec_submit_event() Rearange acpi_ec_event_handler() so as to avoid releasing ec->lock and acquiring it again right away in the case when ec_event_clearing is not ACPI_EC_EVT_TIMING_EVENT. This also reduces the number of checks done by acpi_ec_event_handler() in that case. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index bba96df3b88a9..a1b871a418f87 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1238,6 +1238,7 @@ static void acpi_ec_event_handler(struct work_struct *work) acpi_ec_submit_query(ec); spin_lock_irq(&ec->lock); + ec->events_to_process--; } @@ -1246,27 +1247,30 @@ static void acpi_ec_event_handler(struct work_struct *work) * event handling work again regardless of whether or not the query * queued up above is processed successfully. */ - if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT) + if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT) { + bool guard_timeout; + acpi_ec_complete_event(ec); - else - acpi_ec_close_event(ec); - spin_unlock_irq(&ec->lock); + ec_dbg_evt("Event stopped"); + + spin_unlock_irq(&ec->lock); - ec_dbg_evt("Event stopped"); + guard_timeout = !!ec_guard(ec); - if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec_guard(ec)) { spin_lock_irq(&ec->lock); /* Take care of SCI_EVT unless someone else is doing that. */ - if (!ec->curr) + if (guard_timeout && !ec->curr) advance_transaction(ec, false); + } else { + acpi_ec_close_event(ec); - spin_unlock_irq(&ec->lock); + ec_dbg_evt("Event stopped"); } - spin_lock_irq(&ec->lock); ec->events_in_progress--; + spin_unlock_irq(&ec->lock); } -- GitLab From 6b22ce002f755e34e9a92ce5a41ffda0b729b892 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Thu, 10 Feb 2022 10:26:53 +0900 Subject: [PATCH 0393/1586] docs: Makefile: Add -no-shell-escape option to LATEXOPTS It turns out that LaTeX enables \write18, which allows (some) shell commands to be executed from the document source, by default. This the often-seen warning during a pdfdocs build: restricted \write18 enabled That is a potential security problem and is entirely unnecessary; nothing in the kernel PDF docs build needs that capability. So disable \write18 explicitly. Signed-off-by: Akira Yokosawa Link: https://lore.kernel.org/r/519bd2d9-1bee-03e1-eeb4-d9883c18be0c@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index 9f4bd42cef189..64d44c1ecad31 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -26,7 +26,7 @@ SPHINX_CONF = conf.py PAPER = BUILDDIR = $(obj)/output PDFLATEX = xelatex -LATEXOPTS = -interaction=batchmode +LATEXOPTS = -interaction=batchmode -no-shell-escape ifeq ($(KBUILD_VERBOSE),0) SPHINXOPTS += "-q" -- GitLab From 701fac40384f07197b106136012804c3cae0b3de Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:48 -0800 Subject: [PATCH 0394/1586] iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit PASIDs are process-wide. It was attempted to use refcounted PASIDs to free them when the last thread drops the refcount. This turned out to be complex and error prone. Given the fact that the PASID space is 20 bits, which allows up to 1M processes to have a PASID associated concurrently, PASID resource exhaustion is not a realistic concern. Therefore, it was decided to simplify the approach and stick with lazy on demand PASID allocation, but drop the eager free approach and make an allocated PASID's lifetime bound to the lifetime of the process. Get rid of the refcounting mechanisms and replace/rename the interfaces to reflect this new approach. [ bp: Massage commit message. ] Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20220207230254.3342514-6-fenghua.yu@intel.com --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 5 +-- drivers/iommu/intel/iommu.c | 4 +- drivers/iommu/intel/svm.c | 9 ----- drivers/iommu/ioasid.c | 39 ++----------------- drivers/iommu/iommu-sva-lib.c | 39 ++++++------------- drivers/iommu/iommu-sva-lib.h | 1 - include/linux/ioasid.h | 12 +----- include/linux/sched/mm.h | 16 ++++++++ kernel/fork.c | 1 + 9 files changed, 38 insertions(+), 88 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a737ba5f727e6..22ddd05bbdcd0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { ret = PTR_ERR(bond->smmu_mn); - goto err_free_pasid; + goto err_free_bond; } list_add(&bond->list, &master->bonds); return &bond->sva; -err_free_pasid: - iommu_sva_free_pasid(mm); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle) if (refcount_dec_and_test(&bond->refs)) { list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); - iommu_sva_free_pasid(bond->mm); kfree(bond); } mutex_unlock(&sva_lock); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 92fea3fbbb114..ef03b2176bbd3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4781,7 +4781,7 @@ attach_failed: link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -4811,7 +4811,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5b5d69b04fcc8..51ac2096b3da7 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); } -static void intel_svm_free_pasid(struct mm_struct *mm) -{ - iommu_sva_free_pasid(mm); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, struct mm_struct *mm, @@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } - /* Drop a PASID reference and free it if no reference. */ - intel_svm_free_pasid(mm); } out: return ret; @@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void } sva = intel_svm_bind_mm(iommu, dev, mm, flags); - if (IS_ERR_OR_NULL(sva)) - intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 06fee7416816b..a786c034907c4 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -2,7 +2,7 @@ /* * I/O Address Space ID allocator. There is one global IOASID space, split into * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc and ioasid_put. + * free IOASIDs with ioasid_alloc() and ioasid_free(). */ #include #include @@ -15,7 +15,6 @@ struct ioasid_data { struct ioasid_set *set; void *private; struct rcu_head rcu; - refcount_t refs; }; /* @@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, data->set = set; data->private = private; - refcount_set(&data->refs, 1); /* * Custom allocator needs allocator data to perform platform specific @@ -348,35 +346,11 @@ exit_free: EXPORT_SYMBOL_GPL(ioasid_alloc); /** - * ioasid_get - obtain a reference to the IOASID - * @ioasid: the ID to get - */ -void ioasid_get(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - refcount_inc(&ioasid_data->refs); - else - WARN_ON(1); - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_get); - -/** - * ioasid_put - Release a reference to an ioasid + * ioasid_free - Free an ioasid * @ioasid: the ID to remove - * - * Put a reference to the IOASID, free it when the number of references drops to - * zero. - * - * Return: %true if the IOASID was freed, %false otherwise. */ -bool ioasid_put(ioasid_t ioasid) +void ioasid_free(ioasid_t ioasid) { - bool free = false; struct ioasid_data *ioasid_data; spin_lock(&ioasid_allocator_lock); @@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid) goto exit_unlock; } - free = refcount_dec_and_test(&ioasid_data->refs); - if (!free) - goto exit_unlock; - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); /* Custom allocator needs additional steps to free the xa element */ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { @@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid) exit_unlock: spin_unlock(&ioasid_allocator_lock); - return free; } -EXPORT_SYMBOL_GPL(ioasid_put); +EXPORT_SYMBOL_GPL(ioasid_free); /** * ioasid_find - Find IOASID data diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index bd41405d34e95..1065061438960 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid); * * Try to allocate a PASID for this mm, or take a reference to the existing one * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid, and must be released with iommu_sva_free_pasid(). - * @min must be greater than 0, because 0 indicates an unused mm->pasid. + * available in mm->pasid and will be available for the lifetime of the mm. * * Returns 0 on success and < 0 on error. */ @@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) return -EINVAL; mutex_lock(&iommu_sva_lock); - if (mm->pasid) { - if (mm->pasid >= min && mm->pasid <= max) - ioasid_get(mm->pasid); - else + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) ret = -EOVERFLOW; - } else { - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (pasid == INVALID_IOASID) - ret = -ENOMEM; - else - mm->pasid = pasid; + goto out; } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: mutex_unlock(&iommu_sva_lock); return ret; } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/** - * iommu_sva_free_pasid - Release the mm's PASID - * @mm: the mm - * - * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid() - */ -void iommu_sva_free_pasid(struct mm_struct *mm) -{ - mutex_lock(&iommu_sva_lock); - if (ioasid_put(mm->pasid)) - mm->pasid = 0; - mutex_unlock(&iommu_sva_lock); -} -EXPORT_SYMBOL_GPL(iommu_sva_free_pasid); - /* ioasid_find getter() requires a void * argument */ static bool __mmget_not_zero(void *mm) { diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 95dc3ebc19283..8909ea1094e3a 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -9,7 +9,6 @@ #include int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 2237f64dbaae4..af1c9d62e6428 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,8 +34,7 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -53,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index c74d1edbac2f0..a80356e9dc69a 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -439,8 +439,24 @@ static inline void mm_pasid_init(struct mm_struct *mm) { mm->pasid = INVALID_IOASID; } + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} #else static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index deacd2c17a7f1..c03c6682464c1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1115,6 +1115,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + mm_pasid_drop(mm); mmdrop(mm); } -- GitLab From dc7507ddce593cbd9c93330024a5658db6f8ef73 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:49 -0800 Subject: [PATCH 0395/1586] x86/fpu: Clear PASID when copying fpstate The kernel must allocate a Process Address Space ID (PASID) on behalf of each process which will use ENQCMD and program it into the new MSR to communicate the process identity to platform hardware. ENQCMD uses the PASID stored in this MSR to tag requests from this process. The PASID state must be cleared on fork() since fork creates a new address space. For clone(), it would be functionally OK to copy the PASID. However, clearing it is _also_ functionally OK since any PASID use will trigger the #GP handler to populate the MSR. Copying the PASID state has two main downsides: * It requires differentiating fork() and clone() in the code, both in the FPU code and keeping tsk->pasid_activated consistent. * It guarantees that the PASID is out of its init state, which incurs small but non-zero cost on every XSAVE/XRSTOR. The main downside of clearing the PASID at fpstate copy is the future, one-time #GP for the thread. Use the simplest approach: clear the PASID state both on clone() and fork(). Rely on the #GP handler for MSR population in children. Also, just clear the PASID bit from xfeatures if XSAVE is supported. This will have no effect on systems that do not have PASID support. It is virtually zero overhead because 'dst_fpu' was just written and the whole thing is cache hot. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-7-fenghua.yu@intel.com --- arch/x86/kernel/fpu/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8dea01ffc5c18..19821f027cb39 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) fpu_inherit_perms(dst_fpu); fpregs_unlock(); + /* + * Children never inherit PASID state. + * Force it to have its init value: + */ + if (use_xsave()) + dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; + trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); -- GitLab From a3d29e8291b622780eb6e4e3eeaf2b24ec78fd43 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Feb 2022 15:02:50 -0800 Subject: [PATCH 0396/1586] sched: Define and initialize a flag to identify valid PASID in the task Add a new single bit field to the task structure to track whether this task has initialized the IA32_PASID MSR to the mm's PASID. Initialize the field to zero when creating a new task with fork/clone. Signed-off-by: Peter Zijlstra Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-8-fenghua.yu@intel.com --- include/linux/sched.h | 3 +++ kernel/fork.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248b..4e5de3aed410d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -938,6 +938,9 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd_signal:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/fork.c b/kernel/fork.c index c03c6682464c1..51fd1df994b78 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -968,6 +968,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif +#ifdef CONFIG_IOMMU_SVA + tsk->pasid_activated = 0; +#endif + #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif -- GitLab From fa6af69f38d3f409bedc55d0112eec36ed526d4b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:51 -0800 Subject: [PATCH 0397/1586] x86/traps: Demand-populate PASID MSR via #GP All tasks start with PASID state disabled. This means that the first time they execute an ENQCMD instruction they will take a #GP fault. Modify the #GP fault handler to check if the "mm" for the task has already been allocated a PASID. If so, try to fix the #GP fault by loading the IA32_PASID MSR. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-9-fenghua.yu@intel.com --- arch/x86/kernel/traps.c | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c9d566dcf89a0..7ef00dee35be4 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { -- GitLab From 7c1ef59145f1c8bf9a2cc7a6ebf2fd56bbb440de Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:52 -0800 Subject: [PATCH 0398/1586] x86/cpufeatures: Re-enable ENQCMD The ENQCMD feature can only be used if CONFIG_INTEL_IOMMU_SVM is set. Add X86_FEATURE_ENQCMD to the disabled features mask as appropriate so that cpu_feature_enabled() can be used to check the feature. [ bp: Massage commit message. ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-10-fenghua.yu@intel.com --- arch/x86/include/asm/disabled-features.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8f28fafa98b32..1231d63f836d8 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 -- GitLab From 6e3133d901e89a4ba83ce7ebd8c27bbeaa9ed1f2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:53 -0800 Subject: [PATCH 0399/1586] tools/objtool: Check for use of the ENQCMD instruction in the kernel The ENQCMD instruction implicitly accesses the PASID_MSR to fill in the pasid field of the descriptor being submitted to an accelerator. But there is no precise (and stable across kernel changes) point at which the PASID_MSR is updated from the value for one task to the next. Kernel code that uses accelerators must always use the ENQCMDS instruction which does not access the PASID_MSR. Check for use of the ENQCMD instruction in the kernel and warn on its usage. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220207230254.3342514-11-fenghua.yu@intel.com --- tools/objtool/arch/x86/decode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index c10ef78df050a..479e769ca3241 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; - unsigned char op1, op2, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; @@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ -- GitLab From 83aa52ffed5d35a08e24452d0471e1684075cdf8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:54 -0800 Subject: [PATCH 0400/1586] Documentation/x86: Update documentation for SVA (Shared Virtual Addressing) Adjust the documentation to the new way how a PASID is being allocated, freed and fixed up. Based on a patch by Ashok Raj [ bp: Massage commit message, fix htmldocs build warning ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-12-fenghua.yu@intel.com --- Documentation/x86/sva.rst | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 076efd51ef1fe..2e9b8b0f9a0f4 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application thread can interact with a device. Threads that belong to the same process share the same page tables, thus the same MSR value. -PASID is cleared when a process is created. The PASID allocation and MSR -programming may occur long after a process and its threads have been created. -One thread must call iommu_sva_bind_device() to allocate the PASID for the -process. If a thread uses ENQCMD without the MSR first being populated, a #GP -will be raised. The kernel will update the PASID MSR with the PASID for all -threads in the process. A single process PASID can be used simultaneously -with multiple devices since they all share the same address space. - -One thread can call iommu_sva_unbind_device() to free the allocated PASID. -The kernel will clear the PASID MSR for all threads belonging to the process. - -New threads inherit the MSR value from the parent. +PASID Life Cycle Management +=========================== + +PASID is initialized as INVALID_IOASID (-1) when a process is created. + +Only processes that access SVA-capable devices need to have a PASID +allocated. This allocation happens when a process opens/binds an SVA-capable +device but finds no PASID for this process. Subsequent binds of the same, or +other devices will share the same PASID. + +Although the PASID is allocated to the process by opening a device, +it is not active in any of the threads of that process. It's loaded to the +IA32_PASID MSR lazily when a thread tries to submit a work descriptor +to a device using the ENQCMD. + +That first access will trigger a #GP fault because the IA32_PASID MSR +has not been initialized with the PASID value assigned to the process +when the device was opened. The Linux #GP handler notes that a PASID has +been allocated for the process, and so initializes the IA32_PASID MSR +and returns so that the ENQCMD instruction is re-executed. + +On fork(2) or exec(2) the PASID is removed from the process as it no +longer has the same address space that it had when the device was opened. + +On clone(2) the new task shares the same address space, so will be +able to use the PASID allocated to the process. The IA32_PASID is not +preemptively initialized as the PASID value might not be allocated yet or +the kernel does not know whether this thread is going to access the device +and the cleared IA32_PASID MSR reduces context switch overhead by xstate +init optimization. Since #GP faults have to be handled on any threads that +were created before the PASID was assigned to the mm of the process, newly +created threads might as well be treated in a consistent way. + +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in +all threads in unbind, free the PASID lazily only on mm exit. + +If a process does a close(2) of the device file descriptor and munmap(2) +of the device MMIO portal, then the driver will unbind the device. The +PASID is still marked VALID in the PASID_MSR for any threads in the +process that accessed the device. But this is harmless as without the +MMIO portal they cannot submit new work to the device. Relationships ============= -- GitLab From 45ec846c1cd11835a29c85645065115dd791aa45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:25:58 +0000 Subject: [PATCH 0401/1586] irqdomain: Let irq_domain_set_{info,hwirq_and_chip} take a const irq_chip In order to let a const irqchip be fed to the irqchip layer, adjust the various prototypes. An extra cast in irq_domain_set_hwirq_and_chip() is required to avoid a warning. Signed-off-by: Marc Zyngier Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-2-maz@kernel.org --- include/linux/irqdomain.h | 5 +++-- kernel/irq/irqdomain.c | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index be25a33293e57..00d577f908833 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -479,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); extern void irq_domain_reset_irq_data(struct irq_data *irq_data); @@ -522,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, - struct irq_chip *chip, + const struct irq_chip *chip, void *chip_data); extern void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index bf38c546aa256..d5ce965105493 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1319,7 +1319,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @chip_data: The associated chip data */ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data) { struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); @@ -1328,7 +1329,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, return -ENOENT; irq_data->hwirq = hwirq; - irq_data->chip = chip ? chip : &no_irq_chip; + irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); irq_data->chip_data = chip_data; return 0; @@ -1347,7 +1348,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { @@ -1853,7 +1854,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { -- GitLab From 393e1280f765661cf39785e967676a4e57324126 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:25:59 +0000 Subject: [PATCH 0402/1586] genirq: Allow irq_chip registration functions to take a const irq_chip In order to let a const irqchip be fed to the irqchip layer, adjust the various prototypes. An extra cast in irq_set_chip()() is required to avoid a warning. Signed-off-by: Marc Zyngier Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-3-maz@kernel.org --- include/linux/irq.h | 7 ++++--- kernel/irq/chip.c | 9 +++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 2cb2e2ac2703c..f92788ccdba27 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -710,10 +710,11 @@ extern struct irq_chip no_irq_chip; extern struct irq_chip dummy_irq_chip; extern void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name); -static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip, +static inline void irq_set_chip_and_handler(unsigned int irq, + const struct irq_chip *chip, irq_flow_handler_t handle) { irq_set_chip_and_handler_name(irq, chip, handle, NULL); @@ -803,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) } /* Set/get chip/data for an IRQ: */ -extern int irq_set_chip(unsigned int irq, struct irq_chip *chip); +extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip); extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 24b6f2b40e5e9..54af0deb239b8 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -38,7 +38,7 @@ struct irqaction chained_action = { * @irq: irq number * @chip: pointer to irq chip description structure */ -int irq_set_chip(unsigned int irq, struct irq_chip *chip) +int irq_set_chip(unsigned int irq, const struct irq_chip *chip) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); @@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip) if (!desc) return -EINVAL; - if (!chip) - chip = &no_irq_chip; - - desc->irq_data.chip = chip; + desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip); irq_put_desc_unlock(desc, flags); /* * For !CONFIG_SPARSE_IRQ make the irq show up in @@ -1073,7 +1070,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name) { irq_set_chip(irq, chip); -- GitLab From 0a25cb5544f4f01d2e7c06164555fd9cd6eb64fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 10 Feb 2022 09:27:21 +0000 Subject: [PATCH 0403/1586] genirq/debugfs: Use irq_print_chip() when provided by irqchip Since irqchips have the option to implement irq_print_chip, use this when available to output the irqchip name in debugfs. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220215112154.1360040-1-maz@kernel.org --- kernel/irq/debugfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index e4cff358b437e..2b43f5f5033d1 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -69,8 +69,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind) seq_printf(m, "chip: None\n"); return; } - seq_printf(m, "%*schip: %s\n", ind, "", chip->name); - seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags); + seq_printf(m, "%*schip: ", ind, ""); + if (chip->irq_print_chip) + chip->irq_print_chip(data, m); + else + seq_printf(m, "%s", chip->name); + seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags); irq_debug_show_bits(m, ind, chip->flags, irqchip_flags, ARRAY_SIZE(irqchip_flags)); } -- GitLab From 745f1fb91fe51b4a36bc1c3dbccdbbbb978f93d2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:26:00 +0000 Subject: [PATCH 0404/1586] irqchip/gic: Switch to dynamic chip name output The last dynamic aspect of the GIC's irq_chip structure is the name that is associated to it. Move the output of that name to the relevant callback, which allows us to do a bit of cleanup and mark the structures const. Signed-off-by: Marc Zyngier Reviewed-by: Linus Walleij Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-4-maz@kernel.org --- drivers/irqchip/irq-gic.c | 102 ++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index fb741b42ca2d2..58ba835bee1f3 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,6 @@ union gic_base { }; struct gic_chip_data { - struct irq_chip chip; union gic_base dist_base; union gic_base cpu_base; void __iomem *raw_dist_base; @@ -397,18 +397,15 @@ static void gic_handle_cascade_irq(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static const struct irq_chip gic_chip = { - .irq_mask = gic_mask_irq, - .irq_unmask = gic_unmask_irq, - .irq_eoi = gic_eoi_irq, - .irq_set_type = gic_set_type, - .irq_retrigger = gic_retrigger, - .irq_get_irqchip_state = gic_irq_get_irqchip_state, - .irq_set_irqchip_state = gic_irq_set_irqchip_state, - .flags = IRQCHIP_SET_TYPE_MASKED | - IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, -}; +static void gic_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); + + if (gic->domain->dev) + seq_printf(p, gic->domain->dev->of_node->name); + else + seq_printf(p, "GIC-%d", (int)(gic - &gic_data[0])); +} void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) { @@ -799,8 +796,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d); + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); unsigned int cpu; + if (unlikely(gic != &gic_data[0])) + return -EINVAL; + if (!force) cpu = cpumask_any_and(mask_val, cpu_online_mask); else @@ -880,6 +881,39 @@ static __init void gic_smp_init(void) #define gic_ipi_send_mask NULL #endif +static const struct irq_chip gic_chip = { + .irq_mask = gic_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, + .irq_set_type = gic_set_type, + .irq_retrigger = gic_retrigger, + .irq_set_affinity = gic_set_affinity, + .ipi_send_mask = gic_ipi_send_mask, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_print_chip = gic_irq_print_chip, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static const struct irq_chip gic_chip_mode1 = { + .name = "GICv2", + .irq_mask = gic_eoimode1_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoimode1_eoi_irq, + .irq_set_type = gic_set_type, + .irq_retrigger = gic_retrigger, + .irq_set_affinity = gic_set_affinity, + .ipi_send_mask = gic_ipi_send_mask, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + #ifdef CONFIG_BL_SWITCHER /* * gic_send_sgi - send a SGI directly to given CPU interface number @@ -1024,15 +1058,19 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, { struct gic_chip_data *gic = d->host_data; struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq)); + const struct irq_chip *chip; + + chip = (static_branch_likely(&supports_deactivate_key) && + gic == &gic_data[0]) ? &gic_chip_mode1 : &gic_chip; switch (hw) { case 0 ... 31: irq_set_percpu_devid(irq); - irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data, + irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); break; default: - irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data, + irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); irq_set_probe(irq); irqd_set_single_target(irqd); @@ -1127,25 +1165,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .unmap = gic_irq_domain_unmap, }; -static void gic_init_chip(struct gic_chip_data *gic, const char *name, - bool use_eoimode1) -{ - /* Initialize irq_chip */ - gic->chip = gic_chip; - gic->chip.name = name; - - if (use_eoimode1) { - gic->chip.irq_mask = gic_eoimode1_mask_irq; - gic->chip.irq_eoi = gic_eoimode1_eoi_irq; - gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity; - } - - if (gic == &gic_data[0]) { - gic->chip.irq_set_affinity = gic_set_affinity; - gic->chip.ipi_send_mask = gic_ipi_send_mask; - } -} - static int gic_init_bases(struct gic_chip_data *gic, struct fwnode_handle *handle) { @@ -1245,7 +1264,6 @@ error: static int __init __gic_init_bases(struct gic_chip_data *gic, struct fwnode_handle *handle) { - char *name; int i, ret; if (WARN_ON(!gic || gic->domain)) @@ -1265,18 +1283,8 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, pr_info("GIC: Using split EOI/Deactivate mode\n"); } - if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) { - name = kasprintf(GFP_KERNEL, "GICv2"); - gic_init_chip(gic, name, true); - } else { - name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0])); - gic_init_chip(gic, name, false); - } - ret = gic_init_bases(gic, handle); - if (ret) - kfree(name); - else if (gic == &gic_data[0]) + if (gic == &gic_data[0]) gic_smp_init(); return ret; @@ -1459,8 +1467,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq) if (!*gic) return -ENOMEM; - gic_init_chip(*gic, dev->of_node->name, false); - ret = gic_of_setup(*gic, dev->of_node); if (ret) return ret; -- GitLab From 365550239f87342e3d29802fce0f5e9a044e8070 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:26:01 +0000 Subject: [PATCH 0405/1586] irqchip/lpc32xx: Switch to dynamic chip name output Instead of overriding the name field with the device name, use the relevant callback. This allows us to make the irq_chip structure const. Signed-off-by: Marc Zyngier Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-5-maz@kernel.org --- drivers/irqchip/irq-lpc32xx.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c index a29357f394506..4d70a857133f4 100644 --- a/drivers/irqchip/irq-lpc32xx.c +++ b/drivers/irqchip/irq-lpc32xx.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -25,8 +26,8 @@ struct lpc32xx_irq_chip { void __iomem *base; + phys_addr_t addr; struct irq_domain *domain; - struct irq_chip chip; }; static struct lpc32xx_irq_chip *lpc32xx_mic_irqc; @@ -118,6 +119,24 @@ static int lpc32xx_irq_set_type(struct irq_data *d, unsigned int type) return 0; } +static void lpc32xx_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct lpc32xx_irq_chip *ic = irq_data_get_irq_chip_data(d); + + if (ic == lpc32xx_mic_irqc) + seq_printf(p, "%08x.mic", ic->addr); + else + seq_printf(p, "%08x.sic", ic->addr); +} + +static const struct irq_chip lpc32xx_chip = { + .irq_ack = lpc32xx_irq_ack, + .irq_mask = lpc32xx_irq_mask, + .irq_unmask = lpc32xx_irq_unmask, + .irq_set_type = lpc32xx_irq_set_type, + .irq_print_chip = lpc32xx_irq_print_chip, +}; + static void __exception_irq_entry lpc32xx_handle_irq(struct pt_regs *regs) { struct lpc32xx_irq_chip *ic = lpc32xx_mic_irqc; @@ -153,7 +172,7 @@ static int lpc32xx_irq_domain_map(struct irq_domain *id, unsigned int virq, struct lpc32xx_irq_chip *ic = id->host_data; irq_set_chip_data(virq, ic); - irq_set_chip_and_handler(virq, &ic->chip, handle_level_irq); + irq_set_chip_and_handler(virq, &lpc32xx_chip, handle_level_irq); irq_set_status_flags(virq, IRQ_LEVEL); irq_set_noprobe(virq); @@ -183,6 +202,7 @@ static int __init lpc32xx_of_ic_init(struct device_node *node, if (!irqc) return -ENOMEM; + irqc->addr = addr; irqc->base = of_iomap(node, 0); if (!irqc->base) { pr_err("%pOF: unable to map registers\n", node); @@ -190,21 +210,11 @@ static int __init lpc32xx_of_ic_init(struct device_node *node, return -EINVAL; } - irqc->chip.irq_ack = lpc32xx_irq_ack; - irqc->chip.irq_mask = lpc32xx_irq_mask; - irqc->chip.irq_unmask = lpc32xx_irq_unmask; - irqc->chip.irq_set_type = lpc32xx_irq_set_type; - if (is_mic) - irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.mic", addr); - else - irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.sic", addr); - irqc->domain = irq_domain_add_linear(node, NR_LPC32XX_IC_IRQS, &lpc32xx_irq_domain_ops, irqc); if (!irqc->domain) { pr_err("unable to add irq domain\n"); iounmap(irqc->base); - kfree(irqc->chip.name); kfree(irqc); return -ENODEV; } -- GitLab From 421f16238a9da88ce1f2e3a103866de095cc15f6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:26:02 +0000 Subject: [PATCH 0406/1586] irqchip/mvebu-pic: Switch to dynamic chip name output Instead of overriding the name field, track the corresponding device and use the relevant callback to output its name. This allows us to make the irq_chip structure const. Signed-off-by: Marc Zyngier Reviewed-by: Gregory CLEMENT Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-6-maz@kernel.org --- drivers/irqchip/irq-mvebu-pic.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c index 870f9866b8da4..ef3d3646ccc2f 100644 --- a/drivers/irqchip/irq-mvebu-pic.c +++ b/drivers/irqchip/irq-mvebu-pic.c @@ -18,6 +18,7 @@ #include #include #include +#include #define PIC_CAUSE 0x0 #define PIC_MASK 0x4 @@ -29,7 +30,7 @@ struct mvebu_pic { void __iomem *base; u32 parent_irq; struct irq_domain *domain; - struct irq_chip irq_chip; + struct platform_device *pdev; }; static void mvebu_pic_reset(struct mvebu_pic *pic) @@ -66,6 +67,20 @@ static void mvebu_pic_unmask_irq(struct irq_data *d) writel(reg, pic->base + PIC_MASK); } +static void mvebu_pic_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct mvebu_pic *pic = irq_data_get_irq_chip_data(d); + + seq_printf(p, dev_name(&pic->pdev->dev)); +} + +static const struct irq_chip mvebu_pic_chip = { + .irq_mask = mvebu_pic_mask_irq, + .irq_unmask = mvebu_pic_unmask_irq, + .irq_eoi = mvebu_pic_eoi_irq, + .irq_print_chip = mvebu_pic_print_chip, +}; + static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { @@ -73,8 +88,7 @@ static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq, irq_set_percpu_devid(virq); irq_set_chip_data(virq, pic); - irq_set_chip_and_handler(virq, &pic->irq_chip, - handle_percpu_devid_irq); + irq_set_chip_and_handler(virq, &mvebu_pic_chip, handle_percpu_devid_irq); irq_set_status_flags(virq, IRQ_LEVEL); irq_set_probe(virq); @@ -120,22 +134,16 @@ static int mvebu_pic_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct mvebu_pic *pic; - struct irq_chip *irq_chip; pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL); if (!pic) return -ENOMEM; + pic->pdev = pdev; pic->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pic->base)) return PTR_ERR(pic->base); - irq_chip = &pic->irq_chip; - irq_chip->name = dev_name(&pdev->dev); - irq_chip->irq_mask = mvebu_pic_mask_irq; - irq_chip->irq_unmask = mvebu_pic_unmask_irq; - irq_chip->irq_eoi = mvebu_pic_eoi_irq; - pic->parent_irq = irq_of_parse_and_map(node, 0); if (pic->parent_irq <= 0) { dev_err(&pdev->dev, "Failed to parse parent interrupt\n"); -- GitLab From 3344265a2692414831c15964dd27a5b4adc4ed83 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:26:03 +0000 Subject: [PATCH 0407/1586] irqchip/ts4800: Switch to dynamic chip name output Signed-off-by: Marc Zyngier Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-7-maz@kernel.org --- drivers/irqchip/irq-ts4800.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c index f032db23b30f5..b2d61d4f6fe6f 100644 --- a/drivers/irqchip/irq-ts4800.c +++ b/drivers/irqchip/irq-ts4800.c @@ -19,14 +19,15 @@ #include #include #include +#include #define IRQ_MASK 0x4 #define IRQ_STATUS 0x8 struct ts4800_irq_data { void __iomem *base; + struct platform_device *pdev; struct irq_domain *domain; - struct irq_chip irq_chip; }; static void ts4800_irq_mask(struct irq_data *d) @@ -47,12 +48,25 @@ static void ts4800_irq_unmask(struct irq_data *d) writew(reg & ~mask, data->base + IRQ_MASK); } +static void ts4800_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d); + + seq_printf(p, "%s", dev_name(&data->pdev->dev)); +} + +static const struct irq_chip ts4800_chip = { + .irq_mask = ts4800_irq_mask, + .irq_unmask = ts4800_irq_unmask, + .irq_print_chip = ts4800_irq_print_chip, +}; + static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { struct ts4800_irq_data *data = d->host_data; - irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq); + irq_set_chip_and_handler(irq, &ts4800_chip, handle_simple_irq); irq_set_chip_data(irq, data); irq_set_noprobe(irq); @@ -92,13 +106,13 @@ static int ts4800_ic_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct ts4800_irq_data *data; - struct irq_chip *irq_chip; int parent_irq; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + data->pdev = pdev; data->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(data->base)) return PTR_ERR(data->base); @@ -111,11 +125,6 @@ static int ts4800_ic_probe(struct platform_device *pdev) return -EINVAL; } - irq_chip = &data->irq_chip; - irq_chip->name = dev_name(&pdev->dev); - irq_chip->irq_mask = ts4800_irq_mask; - irq_chip->irq_unmask = ts4800_irq_unmask; - data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data); if (!data->domain) { dev_err(&pdev->dev, "cannot add IRQ domain\n"); -- GitLab From 3fb212a042fbd8eccbb2af1852e03ed7757b9600 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Feb 2022 16:26:04 +0000 Subject: [PATCH 0408/1586] irqchip/versatile-fpga: Switch to dynamic chip name output Move the name output to the relevant callback, which allows us some nice cleanups (mostly owing to the fact that the driver is now DT only. We also drop a random include directive from the ftintc010 driver. Signed-off-by: Marc Zyngier Reviewed-by: Linus Walleij Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20220209162607.1118325-8-maz@kernel.org --- drivers/irqchip/irq-ftintc010.c | 1 - drivers/irqchip/irq-versatile-fpga.c | 46 +++++++++++++++----------- include/linux/irqchip/versatile-fpga.h | 14 -------- 3 files changed, 26 insertions(+), 35 deletions(-) delete mode 100644 include/linux/irqchip/versatile-fpga.h diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c index 5cc268880f8e1..46a3aa60e50e3 100644 --- a/drivers/irqchip/irq-ftintc010.c +++ b/drivers/irqchip/irq-ftintc010.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index f2757b6aecc81..ba543ed9c1543 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -7,12 +7,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -34,14 +34,12 @@ /** * struct fpga_irq_data - irq data container for the FPGA IRQ controller * @base: memory offset in virtual memory - * @chip: chip container for this instance * @domain: IRQ domain for this instance * @valid: mask for valid IRQs on this controller * @used_irqs: number of active IRQs on this controller */ struct fpga_irq_data { void __iomem *base; - struct irq_chip chip; u32 valid; struct irq_domain *domain; u8 used_irqs; @@ -67,6 +65,20 @@ static void fpga_irq_unmask(struct irq_data *d) writel(mask, f->base + IRQ_ENABLE_SET); } +static void fpga_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct fpga_irq_data *f = irq_data_get_irq_chip_data(d); + + seq_printf(p, irq_domain_get_of_node(f->domain)->name); +} + +static const struct irq_chip fpga_chip = { + .irq_ack = fpga_irq_mask, + .irq_mask = fpga_irq_mask, + .irq_unmask = fpga_irq_unmask, + .irq_print_chip = fpga_irq_print_chip, +}; + static void fpga_irq_handle(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -116,7 +128,7 @@ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs) * Keep iterating over all registered FPGA IRQ controllers until there are * no pending interrupts. */ -asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs) +static asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs) { int i, handled; @@ -135,8 +147,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq, if (!(f->valid & BIT(hwirq))) return -EPERM; irq_set_chip_data(irq, f); - irq_set_chip_and_handler(irq, &f->chip, - handle_level_irq); + irq_set_chip_and_handler(irq, &fpga_chip, handle_level_irq); irq_set_probe(irq); return 0; } @@ -146,8 +157,8 @@ static const struct irq_domain_ops fpga_irqdomain_ops = { .xlate = irq_domain_xlate_onetwocell, }; -void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, - int parent_irq, u32 valid, struct device_node *node) +static void __init fpga_irq_init(void __iomem *base, int parent_irq, + u32 valid, struct device_node *node) { struct fpga_irq_data *f; int i; @@ -158,10 +169,6 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, } f = &fpga_irq_devices[fpga_irq_id]; f->base = base; - f->chip.name = name; - f->chip.irq_ack = fpga_irq_mask; - f->chip.irq_mask = fpga_irq_mask; - f->chip.irq_unmask = fpga_irq_unmask; f->valid = valid; if (parent_irq != -1) { @@ -169,20 +176,19 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, f); } - /* This will also allocate irq descriptors */ - f->domain = irq_domain_add_simple(node, fls(valid), irq_start, + f->domain = irq_domain_add_linear(node, fls(valid), &fpga_irqdomain_ops, f); /* This will allocate all valid descriptors in the linear case */ for (i = 0; i < fls(valid); i++) if (valid & BIT(i)) { - if (!irq_start) - irq_create_mapping(f->domain, i); + /* Is this still required? */ + irq_create_mapping(f->domain, i); f->used_irqs++; } pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs", - fpga_irq_id, name, base, f->used_irqs); + fpga_irq_id, node->name, base, f->used_irqs); if (parent_irq != -1) pr_cont(", parent IRQ: %d\n", parent_irq); else @@ -192,8 +198,8 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, } #ifdef CONFIG_OF -int __init fpga_irq_of_init(struct device_node *node, - struct device_node *parent) +static int __init fpga_irq_of_init(struct device_node *node, + struct device_node *parent) { void __iomem *base; u32 clear_mask; @@ -222,7 +228,7 @@ int __init fpga_irq_of_init(struct device_node *node, parent_irq = -1; } - fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node); + fpga_irq_init(base, parent_irq, valid_mask, node); /* * On Versatile AB/PB, some secondary interrupts have a direct diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h deleted file mode 100644 index a978fc8c79965..0000000000000 --- a/include/linux/irqchip/versatile-fpga.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLAT_FPGA_IRQ_H -#define PLAT_FPGA_IRQ_H - -struct device_node; -struct pt_regs; - -void fpga_handle_irq(struct pt_regs *regs); -void fpga_irq_init(void __iomem *, const char *, int, int, u32, - struct device_node *node); -int fpga_irq_of_init(struct device_node *node, - struct device_node *parent); - -#endif -- GitLab From 95ed57c73bbcffa02cbb1d61c19484c2ec6de094 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 10 Feb 2022 14:48:56 -0800 Subject: [PATCH 0409/1586] perf: replace bitmap_weight with bitmap_empty where appropriate In some places, drivers/perf code calls bitmap_weight() to check if any bit of a given bitmap is set. It's better to use bitmap_empty() in that case because bitmap_empty() stops traversing the bitmap as soon as it finds first set bit, while bitmap_weight() counts all bits unconditionally. Signed-off-by: Yury Norov Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220210224933.379149-13-yury.norov@gmail.com Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 2 +- drivers/perf/arm_pmu.c | 4 ++-- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +- drivers/perf/xgene_pmu.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 54aca3a628147..96e09fa409095 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1096,7 +1096,7 @@ static void cci_pmu_enable(struct pmu *pmu) { struct cci_pmu *cci_pmu = to_cci_pmu(pmu); struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs); + bool enabled = !bitmap_empty(hw_events->used_mask, cci_pmu->num_cntrs); unsigned long flags; if (!enabled) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 295cc7952d0ed..a31b302b0aded 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -524,7 +524,7 @@ static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events); /* For task-bound events we may be called on other CPUs */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) @@ -785,7 +785,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, { struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events); if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return NOTIFY_DONE; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index a738aeab5c049..358e4e284a629 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); void hisi_uncore_pmu_enable(struct pmu *pmu) { struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu); - int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask, + bool enabled = !bitmap_empty(hisi_pmu->pmu_events.used_mask, hisi_pmu->num_counters); if (!enabled) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 5283608dc055b..0c32dffc7edef 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -867,7 +867,7 @@ static void xgene_perf_pmu_enable(struct pmu *pmu) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu); struct xgene_pmu *xgene_pmu = pmu_dev->parent; - int enabled = bitmap_weight(pmu_dev->cntr_assign_mask, + bool enabled = !bitmap_empty(pmu_dev->cntr_assign_mask, pmu_dev->max_counters); if (!enabled) -- GitLab From 69591a402d18fd60c8c95b061279f17bf0b78129 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 09:10:47 +0100 Subject: [PATCH 0410/1586] block: remove biodoc.rst This document is completely out of date and extremely misleading. In general the existing kerneldoc comment serve as a much better documentation of the still existing functionality, while the history blurbs are pretty much irrelevant today. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220215081047.3693582-1-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/block/biodoc.rst | 1155 -------------------------------- Documentation/block/index.rst | 1 - 2 files changed, 1156 deletions(-) delete mode 100644 Documentation/block/biodoc.rst diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst deleted file mode 100644 index fdebc0fd8468f..0000000000000 --- a/Documentation/block/biodoc.rst +++ /dev/null @@ -1,1155 +0,0 @@ -===================================================== -Notes on the Generic Block Layer Rewrite in Linux 2.5 -===================================================== - -.. note:: - - It seems that there are lot of outdated stuff here. This seems - to be written somewhat as a task list. Yet, eventually, something - here might still be useful. - -Notes Written on Jan 15, 2002: - - - Jens Axboe - - Suparna Bhattacharya - -Last Updated May 2, 2002 - -September 2003: Updated I/O Scheduler portions - - Nick Piggin - -Introduction -============ - -These are some notes describing some aspects of the 2.5 block layer in the -context of the bio rewrite. The idea is to bring out some of the key -changes and a glimpse of the rationale behind those changes. - -Please mail corrections & suggestions to suparna@in.ibm.com. - -Credits -======= - -2.5 bio rewrite: - - Jens Axboe - -Many aspects of the generic block layer redesign were driven by and evolved -over discussions, prior patches and the collective experience of several -people. See sections 8 and 9 for a list of some related references. - -The following people helped with review comments and inputs for this -document: - - - Christoph Hellwig - - Arjan van de Ven - - Randy Dunlap - - Andre Hedrick - -The following people helped with fixes/contributions to the bio patches -while it was still work-in-progress: - - - David S. Miller - - -.. Description of Contents: - - 1. Scope for tuning of logic to various needs - 1.1 Tuning based on device or low level driver capabilities - - Per-queue parameters - - Highmem I/O support - - I/O scheduler modularization - 1.2 Tuning based on high level requirements/capabilities - 1.2.1 Request Priority/Latency - 1.3 Direct access/bypass to lower layers for diagnostics and special - device operations - 1.3.1 Pre-built commands - 2. New flexible and generic but minimalist i/o structure or descriptor - (instead of using buffer heads at the i/o layer) - 2.1 Requirements/Goals addressed - 2.2 The bio struct in detail (multi-page io unit) - 2.3 Changes in the request structure - 3. Using bios - 3.1 Setup/teardown (allocation, splitting) - 3.2 Generic bio helper routines - 3.2.1 Traversing segments and completion units in a request - 3.2.2 Setting up DMA scatterlists - 3.2.3 I/O completion - 3.2.4 Implications for drivers that do not interpret bios (don't handle - multiple segments) - 3.3 I/O submission - 4. The I/O scheduler - 5. Scalability related changes - 5.1 Granular locking: Removal of io_request_lock - 5.2 Prepare for transition to 64 bit sector_t - 6. Other Changes/Implications - 6.1 Partition re-mapping handled by the generic block layer - 7. A few tips on migration of older drivers - 8. A list of prior/related/impacted patches/ideas - 9. Other References/Discussion Threads - - -Bio Notes -========= - -Let us discuss the changes in the context of how some overall goals for the -block layer are addressed. - -1. Scope for tuning the generic logic to satisfy various requirements -===================================================================== - -The block layer design supports adaptable abstractions to handle common -processing with the ability to tune the logic to an appropriate extent -depending on the nature of the device and the requirements of the caller. -One of the objectives of the rewrite was to increase the degree of tunability -and to enable higher level code to utilize underlying device/driver -capabilities to the maximum extent for better i/o performance. This is -important especially in the light of ever improving hardware capabilities -and application/middleware software designed to take advantage of these -capabilities. - -1.1 Tuning based on low level device / driver capabilities ----------------------------------------------------------- - -Sophisticated devices with large built-in caches, intelligent i/o scheduling -optimizations, high memory DMA support, etc may find some of the -generic processing an overhead, while for less capable devices the -generic functionality is essential for performance or correctness reasons. -Knowledge of some of the capabilities or parameters of the device should be -used at the generic block layer to take the right decisions on -behalf of the driver. - -How is this achieved ? - -Tuning at a per-queue level: - -i. Per-queue limits/values exported to the generic layer by the driver - -Various parameters that the generic i/o scheduler logic uses are set at -a per-queue level (e.g maximum request size, maximum number of segments in -a scatter-gather list, logical block size) - -Some parameters that were earlier available as global arrays indexed by -major/minor are now directly associated with the queue. Some of these may -move into the block device structure in the future. Some characteristics -have been incorporated into a queue flags field rather than separate fields -in themselves. There are blk_queue_xxx functions to set the parameters, -rather than update the fields directly - -Some new queue property settings: - - blk_queue_bounce_limit(q, u64 dma_address) - Enable I/O to highmem pages, dma_address being the - limit. No highmem default. - - blk_queue_max_sectors(q, max_sectors) - Sets two variables that limit the size of the request. - - - The request queue's max_sectors, which is a soft size in - units of 512 byte sectors, and could be dynamically varied - by the core kernel. - - - The request queue's max_hw_sectors, which is a hard limit - and reflects the maximum size request a driver can handle - in units of 512 byte sectors. - - The default for both max_sectors and max_hw_sectors is - 255. The upper limit of max_sectors is 1024. - - blk_queue_max_phys_segments(q, max_segments) - Maximum physical segments you can handle in a request. 128 - default (driver limit). (See 3.2.2) - - blk_queue_max_hw_segments(q, max_segments) - Maximum dma segments the hardware can handle in a request. 128 - default (host adapter limit, after dma remapping). - (See 3.2.2) - - blk_queue_max_segment_size(q, max_seg_size) - Maximum size of a clustered segment, 64kB default. - - blk_queue_logical_block_size(q, logical_block_size) - Lowest possible sector size that the hardware can operate - on, 512 bytes default. - -New queue flags: - - - QUEUE_FLAG_CLUSTER (see 3.2.2) - - QUEUE_FLAG_QUEUED (see 3.2.4) - - -ii. High-mem i/o capabilities are now considered the default - -The generic bounce buffer logic, present in 2.4, where the block layer would -by default copyin/out i/o requests on high-memory buffers to low-memory buffers -assuming that the driver wouldn't be able to handle it directly, has been -changed in 2.5. The bounce logic is now applied only for memory ranges -for which the device cannot handle i/o. A driver can specify this by -setting the queue bounce limit for the request queue for the device -(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out -where a device is capable of handling high memory i/o. - -In order to enable high-memory i/o where the device is capable of supporting -it, the pci dma mapping routines and associated data structures have now been -modified to accomplish a direct page -> bus translation, without requiring -a virtual address mapping (unlike the earlier scheme of virtual address --> bus translation). So this works uniformly for high-memory pages (which -do not have a corresponding kernel virtual address space mapping) and -low-memory pages. - -Note: Please refer to Documentation/core-api/dma-api-howto.rst for a discussion -on PCI high mem DMA aspects and mapping of scatter gather lists, and support -for 64 bit PCI. - -Special handling is required only for cases where i/o needs to happen on -pages at physical memory addresses beyond what the device can support. In these -cases, a bounce bio representing a buffer from the supported memory range -is used for performing the i/o with copyin/copyout as needed depending on -the type of the operation. For example, in case of a read operation, the -data read has to be copied to the original buffer on i/o completion, so a -callback routine is set up to do this, while for write, the data is copied -from the original buffer to the bounce buffer prior to issuing the -operation. Since an original buffer may be in a high memory area that's not -mapped in kernel virtual addr, a kmap operation may be required for -performing the copy, and special care may be needed in the completion path -as it may not be in irq context. Special care is also required (by way of -GFP flags) when allocating bounce buffers, to avoid certain highmem -deadlock possibilities. - -It is also possible that a bounce buffer may be allocated from high-memory -area that's not mapped in kernel virtual addr, but within the range that the -device can use directly; so the bounce page may need to be kmapped during -copy operations. [Note: This does not hold in the current implementation, -though] - -There are some situations when pages from high memory may need to -be kmapped, even if bounce buffers are not necessary. For example a device -may need to abort DMA operations and revert to PIO for the transfer, in -which case a virtual mapping of the page is required. For SCSI it is also -done in some scenarios where the low level driver cannot be trusted to -handle a single sg entry correctly. The driver is expected to perform the -kmaps as needed on such occasions as appropriate. A driver could also use -the blk_queue_bounce() routine on its own to bounce highmem i/o to low -memory for specific requests if so desired. - -iii. The i/o scheduler algorithm itself can be replaced/set as appropriate - -As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular -queue or pick from (copy) existing generic schedulers and replace/override -certain portions of it. The 2.5 rewrite provides improved modularization -of the i/o scheduler. There are more pluggable callbacks, e.g for init, -add request, extract request, which makes it possible to abstract specific -i/o scheduling algorithm aspects and details outside of the generic loop. -It also makes it possible to completely hide the implementation details of -the i/o scheduler from block drivers. - -I/O scheduler wrappers are to be used instead of accessing the queue directly. -See section 4. The I/O scheduler for details. - -1.2 Tuning Based on High level code capabilities ------------------------------------------------- - -i. Application capabilities for raw i/o - -This comes from some of the high-performance database/middleware -requirements where an application prefers to make its own i/o scheduling -decisions based on an understanding of the access patterns and i/o -characteristics - -ii. High performance filesystems or other higher level kernel code's -capabilities - -Kernel components like filesystems could also take their own i/o scheduling -decisions for optimizing performance. Journalling filesystems may need -some control over i/o ordering. - -What kind of support exists at the generic block layer for this ? - -The flags and rw fields in the bio structure can be used for some tuning -from above e.g indicating that an i/o is just a readahead request, or priority -settings (currently unused). As far as user applications are concerned they -would need an additional mechanism either via open flags or ioctls, or some -other upper level mechanism to communicate such settings to block. - -1.2.1 Request Priority/Latency -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Todo/Under discussion:: - - Arjan's proposed request priority scheme allows higher levels some broad - control (high/med/low) over the priority of an i/o request vs other pending - requests in the queue. For example it allows reads for bringing in an - executable page on demand to be given a higher priority over pending write - requests which haven't aged too much on the queue. Potentially this priority - could even be exposed to applications in some manner, providing higher level - tunability. Time based aging avoids starvation of lower priority - requests. Some bits in the bi_opf flags field in the bio structure are - intended to be used for this priority information. - - -1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode) ------------------------------------------------------------------------ - -(e.g Diagnostics, Systems Management) - -There are situations where high-level code needs to have direct access to -the low level device capabilities or requires the ability to issue commands -to the device bypassing some of the intermediate i/o layers. -These could, for example, be special control commands issued through ioctl -interfaces, or could be raw read/write commands that stress the drive's -capabilities for certain kinds of fitness tests. Having direct interfaces at -multiple levels without having to pass through upper layers makes -it possible to perform bottom up validation of the i/o path, layer by -layer, starting from the media. - -The normal i/o submission interfaces, e.g submit_bio, could be bypassed -for specially crafted requests which such ioctl or diagnostics -interfaces would typically use, and the elevator add_request routine -can instead be used to directly insert such requests in the queue or preferably -the blk_do_rq routine can be used to place the request on the queue and -wait for completion. Alternatively, sometimes the caller might just -invoke a lower level driver specific interface with the request as a -parameter. - -If the request is a means for passing on special information associated with -the command, then such information is associated with the request->special -field (rather than misuse the request->buffer field which is meant for the -request data buffer's virtual mapping). - -For passing request data, the caller must build up a bio descriptor -representing the concerned memory buffer if the underlying driver interprets -bio segments or uses the block layer end*request* functions for i/o -completion. Alternatively one could directly use the request->buffer field to -specify the virtual address of the buffer, if the driver expects buffer -addresses passed in this way and ignores bio entries for the request type -involved. In the latter case, the driver would modify and manage the -request->buffer, request->sector and request->nr_sectors or -request->current_nr_sectors fields itself rather than using the block layer -end_request or end_that_request_first completion interfaces. -(See 2.3 or Documentation/block/request.rst for a brief explanation of -the request structure fields) - -:: - - [TBD: end_that_request_last should be usable even in this case; - Perhaps an end_that_direct_request_first routine could be implemented to make - handling direct requests easier for such drivers; Also for drivers that - expect bios, a helper function could be provided for setting up a bio - corresponding to a data buffer] - - - - - -1.3.1 Pre-built Commands -^^^^^^^^^^^^^^^^^^^^^^^^ - -A request can be created with a pre-built custom command to be sent directly -to the device. The cmd block in the request structure has room for filling -in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for -command pre-building, and the type of the request is now indicated -through rq->flags instead of via rq->cmd) - -The request structure flags can be set up to indicate the type of request -in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC: -packet command issued via blk_do_rq, REQ_SPECIAL: special request). - -It can help to pre-build device commands for requests in advance. -Drivers can now specify a request prepare function (q->prep_rq_fn) that the -block layer would invoke to pre-build device commands for a given request, -or perform other preparatory processing for the request. This is routine is -called by elv_next_request(), i.e. typically just before servicing a request. -(The prepare function would not be called for requests that have RQF_DONTPREP -enabled) - -Aside: - Pre-building could possibly even be done early, i.e before placing the - request on the queue, rather than construct the command on the fly in the - driver while servicing the request queue when it may affect latencies in - interrupt context or responsiveness in general. One way to add early - pre-building would be to do it whenever we fail to merge on a request. - Now REQ_NOMERGE is set in the request flags to skip this one in the future, - which means that it will not change before we feed it to the device. So - the pre-builder hook can be invoked there. - - -2. Flexible and generic but minimalist i/o structure/descriptor -=============================================================== - -2.1 Reason for a new structure and requirements addressed ---------------------------------------------------------- - -Prior to 2.5, buffer heads were used as the unit of i/o at the generic block -layer, and the low level request structure was associated with a chain of -buffer heads for a contiguous i/o request. This led to certain inefficiencies -when it came to large i/o requests and readv/writev style operations, as it -forced such requests to be broken up into small chunks before being passed -on to the generic block layer, only to be merged by the i/o scheduler -when the underlying device was capable of handling the i/o in one shot. -Also, using the buffer head as an i/o structure for i/os that didn't originate -from the buffer cache unnecessarily added to the weight of the descriptors -which were generated for each such chunk. - -The following were some of the goals and expectations considered in the -redesign of the block i/o data structure in 2.5. - -1. Should be appropriate as a descriptor for both raw and buffered i/o - - avoid cache related fields which are irrelevant in the direct/page i/o path, - or filesystem block size alignment restrictions which may not be relevant - for raw i/o. -2. Ability to represent high-memory buffers (which do not have a virtual - address mapping in kernel address space). -3. Ability to represent large i/os w/o unnecessarily breaking them up (i.e - greater than PAGE_SIZE chunks in one shot) -4. At the same time, ability to retain independent identity of i/os from - different sources or i/o units requiring individual completion (e.g. for - latency reasons) -5. Ability to represent an i/o involving multiple physical memory segments - (including non-page aligned page fragments, as specified via readv/writev) - without unnecessarily breaking it up, if the underlying device is capable of - handling it. -6. Preferably should be based on a memory descriptor structure that can be - passed around different types of subsystems or layers, maybe even - networking, without duplication or extra copies of data/descriptor fields - themselves in the process -7. Ability to handle the possibility of splits/merges as the structure passes - through layered drivers (lvm, md, evms), with minimal overhead. - -The solution was to define a new structure (bio) for the block layer, -instead of using the buffer head structure (bh) directly, the idea being -avoidance of some associated baggage and limitations. The bio structure -is uniformly used for all i/o at the block layer ; it forms a part of the -bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are -mapped to bio structures. - -2.2 The bio struct ------------------- - -The bio structure uses a vector representation pointing to an array of tuples -of to describe the i/o buffer, and has various other -fields describing i/o parameters and state that needs to be maintained for -performing the i/o. - -Notice that this representation means that a bio has no virtual address -mapping at all (unlike buffer heads). - -:: - - struct bio_vec { - struct page *bv_page; - unsigned short bv_len; - unsigned short bv_offset; - }; - - /* - * main unit of I/O for the block layer and lower layers (ie drivers) - */ - struct bio { - struct bio *bi_next; /* request queue link */ - struct block_device *bi_bdev; /* target device */ - unsigned long bi_flags; /* status, command, etc */ - unsigned long bi_opf; /* low bits: r/w, high: priority */ - - unsigned int bi_vcnt; /* how may bio_vec's */ - struct bvec_iter bi_iter; /* current index into bio_vec array */ - - unsigned int bi_size; /* total size in bytes */ - unsigned short bi_hw_segments; /* segments after DMA remapping */ - unsigned int bi_max; /* max bio_vecs we can hold - used as index into pool */ - struct bio_vec *bi_io_vec; /* the actual vec list */ - bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ - atomic_t bi_cnt; /* pin count: free when it hits zero */ - void *bi_private; - }; - -With this multipage bio design: - -- Large i/os can be sent down in one go using a bio_vec list consisting - of an array of fragments (similar to the way fragments - are represented in the zero-copy network code) -- Splitting of an i/o request across multiple devices (as in the case of - lvm or raid) is achieved by cloning the bio (where the clone points to - the same bi_io_vec array, but with the index and size accordingly modified) -- A linked list of bios is used as before for unrelated merges [#]_ - this - avoids reallocs and makes independent completions easier to handle. -- Code that traverses the req list can find all the segments of a bio - by using rq_for_each_segment. This handles the fact that a request - has multiple bios, each of which can have multiple segments. -- Drivers which can't process a large bio in one shot can use the bi_iter - field to keep track of the next bio_vec entry to process. - (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE) - [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying - bi_offset an len fields] - -.. [#] - - unrelated merges -- a request ends up containing two or more bios that - didn't originate from the same place. - -bi_end_io() i/o callback gets called on i/o completion of the entire bio. - -At a lower level, drivers build a scatter gather list from the merged bios. -The scatter gather list is in the form of an array of -entries with their corresponding dma address mappings filled in at the -appropriate time. As an optimization, contiguous physical pages can be -covered by a single entry where refers to the first page and -covers the range of pages (up to 16 contiguous pages could be covered this -way). There is a helper routine (blk_rq_map_sg) which drivers can use to build -the sg list. - -Note: Right now the only user of bios with more than one page is ll_rw_kio, -which in turn means that only raw I/O uses it (direct i/o may not work -right now). The intent however is to enable clustering of pages etc to -become possible. The pagebuf abstraction layer from SGI also uses multi-page -bios, but that is currently not included in the stock development kernels. -The same is true of Andrew Morton's work-in-progress multipage bio writeout -and readahead patches. - -2.3 Changes in the Request Structure ------------------------------------- - -The request structure is the structure that gets passed down to low level -drivers. The block layer make_request function builds up a request structure, -places it on the queue and invokes the drivers request_fn. The driver makes -use of block layer helper routine elv_next_request to pull the next request -off the queue. Control or diagnostic functions might bypass block and directly -invoke underlying driver entry points passing in a specially constructed -request structure. - -Only some relevant fields (mainly those which changed or may be referred -to in some of the discussion here) are listed below, not necessarily in -the order in which they occur in the structure (see include/linux/blkdev.h) -Refer to Documentation/block/request.rst for details about all the request -structure fields and a quick reference about the layers which are -supposed to use or modify those fields:: - - struct request { - struct list_head queuelist; /* Not meant to be directly accessed by - the driver. - Used by q->elv_next_request_fn - rq->queue is gone - */ - . - . - unsigned char cmd[16]; /* prebuilt command data block */ - unsigned long flags; /* also includes earlier rq->cmd settings */ - . - . - sector_t sector; /* this field is now of type sector_t instead of int - preparation for 64 bit sectors */ - . - . - - /* Number of scatter-gather DMA addr+len pairs after - * physical address coalescing is performed. - */ - unsigned short nr_phys_segments; - - /* Number of scatter-gather addr+len pairs after - * physical and DMA remapping hardware coalescing is performed. - * This is the number of scatter-gather entries the driver - * will actually have to deal with after DMA mapping is done. - */ - unsigned short nr_hw_segments; - - /* Various sector counts */ - unsigned long nr_sectors; /* no. of sectors left: driver modifiable */ - unsigned long hard_nr_sectors; /* block internal copy of above */ - unsigned int current_nr_sectors; /* no. of sectors left in the - current segment:driver modifiable */ - unsigned long hard_cur_sectors; /* block internal copy of the above */ - . - . - int tag; /* command tag associated with request */ - void *special; /* same as before */ - char *buffer; /* valid only for low memory buffers up to - current_nr_sectors */ - . - . - struct bio *bio, *biotail; /* bio list instead of bh */ - struct request_list *rl; - } - -See the req_ops and req_flag_bits definitions for an explanation of the various -flags available. Some bits are used by the block layer or i/o scheduler. - -The behaviour of the various sector counts are almost the same as before, -except that since we have multi-segment bios, current_nr_sectors refers -to the numbers of sectors in the current segment being processed which could -be one of the many segments in the current bio (i.e i/o completion unit). -The nr_sectors value refers to the total number of sectors in the whole -request that remain to be transferred (no change). The purpose of the -hard_xxx values is for block to remember these counts every time it hands -over the request to the driver. These values are updated by block on -end_that_request_first, i.e. every time the driver completes a part of the -transfer and invokes block end*request helpers to mark this. The -driver should not modify these values. The block layer sets up the -nr_sectors and current_nr_sectors fields (based on the corresponding -hard_xxx values and the number of bytes transferred) and updates it on -every transfer that invokes end_that_request_first. It does the same for the -buffer, bio, bio->bi_iter fields too. - -The buffer field is just a virtual address mapping of the current segment -of the i/o buffer in cases where the buffer resides in low-memory. For high -memory i/o, this field is not valid and must not be used by drivers. - -Code that sets up its own request structures and passes them down to -a driver needs to be careful about interoperation with the block layer helper -functions which the driver uses. (Section 1.3) - -3. Using bios -============= - -3.1 Setup/Teardown ------------------- - -There are routines for managing the allocation, and reference counting, and -freeing of bios (bio_alloc, bio_get, bio_put). - -This makes use of Ingo Molnar's mempool implementation, which enables -subsystems like bio to maintain their own reserve memory pools for guaranteed -deadlock-free allocations during extreme VM load. For example, the VM -subsystem makes use of the block layer to writeout dirty pages in order to be -able to free up memory space, a case which needs careful handling. The -allocation logic draws from the preallocated emergency reserve in situations -where it cannot allocate through normal means. If the pool is empty and it -can wait, then it would trigger action that would help free up memory or -replenish the pool (without deadlocking) and wait for availability in the pool. -If it is in IRQ context, and hence not in a position to do this, allocation -could fail if the pool is empty. In general mempool always first tries to -perform allocation without having to wait, even if it means digging into the -pool as long it is not less that 50% full. - -On a free, memory is released to the pool or directly freed depending on -the current availability in the pool. The mempool interface lets the -subsystem specify the routines to be used for normal alloc and free. In the -case of bio, these routines make use of the standard slab allocator. - -The caller of bio_alloc is expected to taken certain steps to avoid -deadlocks, e.g. avoid trying to allocate more memory from the pool while -already holding memory obtained from the pool. - -:: - - [TBD: This is a potential issue, though a rare possibility - in the bounce bio allocation that happens in the current code, since - it ends up allocating a second bio from the same pool while - holding the original bio ] - -Memory allocated from the pool should be released back within a limited -amount of time (in the case of bio, that would be after the i/o is completed). -This ensures that if part of the pool has been used up, some work (in this -case i/o) must already be in progress and memory would be available when it -is over. If allocating from multiple pools in the same code path, the order -or hierarchy of allocation needs to be consistent, just the way one deals -with multiple locks. - -The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc()) -for a non-clone bio. There are the 6 pools setup for different size biovecs, -so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the -given size from these slabs. - -The bio_get() routine may be used to hold an extra reference on a bio prior -to i/o submission, if the bio fields are likely to be accessed after the -i/o is issued (since the bio may otherwise get freed in case i/o completion -happens in the meantime). - -3.2 Generic bio helper Routines -------------------------------- - -3.2.1 Traversing segments and completion units in a request -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The macro rq_for_each_segment() should be used for traversing the bios -in the request list (drivers should avoid directly trying to do it -themselves). Using these helpers should also make it easier to cope -with block changes in the future. - -:: - - struct req_iterator iter; - rq_for_each_segment(bio_vec, rq, iter) - /* bio_vec is now current segment */ - -I/O completion callbacks are per-bio rather than per-segment, so drivers -that traverse bio chains on completion need to keep that in mind. Drivers -which don't make a distinction between segments and completion units would -need to be reorganized to support multi-segment bios. - -3.2.2 Setting up DMA scatterlists -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The blk_rq_map_sg() helper routine would be used for setting up scatter -gather lists from a request, so a driver need not do it on its own. - - nr_segments = blk_rq_map_sg(q, rq, scatterlist); - -The helper routine provides a level of abstraction which makes it easier -to modify the internals of request to scatterlist conversion down the line -without breaking drivers. The blk_rq_map_sg routine takes care of several -things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER -is set) and correct segment accounting to avoid exceeding the limits which -the i/o hardware can handle, based on various queue properties. - -- Prevents a clustered segment from crossing a 4GB mem boundary -- Avoids building segments that would exceed the number of physical - memory segments that the driver can handle (phys_segments) and the - number that the underlying hardware can handle at once, accounting for - DMA remapping (hw_segments) (i.e. IOMMU aware limits). - -Routines which the low level driver can use to set up the segment limits: - -blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of -hw data segments in a request (i.e. the maximum number of address/length -pairs the host adapter can actually hand to the device at once) - -blk_queue_max_phys_segments() : Sets an upper limit on the maximum number -of physical data segments in a request (i.e. the largest sized scatter list -a driver could handle) - -3.2.3 I/O completion -^^^^^^^^^^^^^^^^^^^^ - -The existing generic block layer helper routines end_request, -end_that_request_first and end_that_request_last can be used for i/o -completion (and setting things up so the rest of the i/o or the next -request can be kicked of) as before. With the introduction of multi-page -bio support, end_that_request_first requires an additional argument indicating -the number of sectors completed. - -3.2.4 Implications for drivers that do not interpret bios -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -(don't handle multiple segments) - -Drivers that do not interpret bios e.g those which do not handle multiple -segments and do not support i/o into high memory addresses (require bounce -buffers) and expect only virtually mapped buffers, can access the rq->buffer -field. As before the driver should use current_nr_sectors to determine the -size of remaining data in the current segment (that is the maximum it can -transfer in one go unless it interprets segments), and rely on the block layer -end_request, or end_that_request_first/last to take care of all accounting -and transparent mapping of the next bio segment when a segment boundary -is crossed on completion of a transfer. (The end*request* functions should -be used if only if the request has come down from block/bio path, not for -direct access requests which only specify rq->buffer without a valid rq->bio) - -3.3 I/O Submission ------------------- - -The routine submit_bio() is used to submit a single io. Higher level i/o -routines make use of this: - -(a) Buffered i/o: - -The routine submit_bh() invokes submit_bio() on a bio corresponding to the -bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before. - -(b) Kiobuf i/o (for raw/direct i/o): - -The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and -maps the array to one or more multi-page bios, issuing submit_bio() to -perform the i/o on each of these. - -The embedded bh array in the kiobuf structure has been removed and no -preallocation of bios is done for kiobufs. [The intent is to remove the -blocks array as well, but it's currently in there to kludge around direct i/o.] -Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc. - -Todo/Observation: - - A single kiobuf structure is assumed to correspond to a contiguous range - of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec. - So right now it wouldn't work for direct i/o on non-contiguous blocks. - This is to be resolved. The eventual direction is to replace kiobuf - by kvec's. - - Badari Pulavarty has a patch to implement direct i/o correctly using - bio and kvec. - - -(c) Page i/o: - -Todo/Under discussion: - - Andrew Morton's multi-page bio patches attempt to issue multi-page - writeouts (and reads) from the page cache, by directly building up - large bios for submission completely bypassing the usage of buffer - heads. This work is still in progress. - - Christoph Hellwig had some code that uses bios for page-io (rather than - bh). This isn't included in bio as yet. Christoph was also working on a - design for representing virtual/real extents as an entity and modifying - some of the address space ops interfaces to utilize this abstraction rather - than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf - abstraction, but intended to be as lightweight as possible). - -(d) Direct access i/o: - -Direct access requests that do not contain bios would be submitted differently -as discussed earlier in section 1.3. - -Aside: - - Kvec i/o: - - Ben LaHaise's aio code uses a slightly different structure instead - of kiobufs, called a kvec_cb. This contains an array of - tuples (very much like the networking code), together with a callback function - and data pointer. This is embedded into a brw_cb structure when passed - to brw_kvec_async(). - - Now it should be possible to directly map these kvecs to a bio. Just as while - cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec - array pointer to point to the veclet array in kvecs. - - TBD: In order for this to work, some changes are needed in the way multi-page - bios are handled today. The values of the tuples in such a vector passed in - from higher level code should not be modified by the block layer in the course - of its request processing, since that would make it hard for the higher layer - to continue to use the vector descriptor (kvec) after i/o completes. Instead, - all such transient state should either be maintained in the request structure, - and passed on in some way to the endio completion routine. - - -4. The I/O scheduler -==================== - -I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch -queue and specific I/O schedulers. Unless stated otherwise, elevator is used -to refer to both parts and I/O scheduler to specific I/O schedulers. - -Block layer implements generic dispatch queue in `block/*.c`. -The generic dispatch queue is responsible for requeueing, handling non-fs -requests and all other subtleties. - -Specific I/O schedulers are responsible for ordering normal filesystem -requests. They can also choose to delay certain requests to improve -throughput or whatever purpose. As the plural form indicates, there are -multiple I/O schedulers. They can be built as modules but at least one should -be built inside the kernel. Each queue can choose different one and can also -change to another one dynamically. - -A block layer call to the i/o scheduler follows the convention elv_xxx(). This -calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx -and xxx might not match exactly, but use your imagination. If an elevator -doesn't implement a function, the switch does nothing or some minimal house -keeping work. - -4.1. I/O scheduler API ----------------------- - -The functions an elevator may implement are: (* are mandatory) - -=============================== ================================================ -elevator_merge_fn called to query requests for merge with a bio - -elevator_merge_req_fn called when two requests get merged. the one - which gets merged into the other one will be - never seen by I/O scheduler again. IOW, after - being merged, the request is gone. - -elevator_merged_fn called when a request in the scheduler has been - involved in a merge. It is used in the deadline - scheduler for example, to reposition the request - if its sorting order has changed. - -elevator_allow_merge_fn called whenever the block layer determines - that a bio can be merged into an existing - request safely. The io scheduler may still - want to stop a merge at this point if it - results in some sort of conflict internally, - this hook allows it to do that. Note however - that two *requests* can still be merged at later - time. Currently the io scheduler has no way to - prevent that. It can only learn about the fact - from elevator_merge_req_fn callback. - -elevator_dispatch_fn* fills the dispatch queue with ready requests. - I/O schedulers are free to postpone requests by - not filling the dispatch queue unless @force - is non-zero. Once dispatched, I/O schedulers - are not allowed to manipulate the requests - - they belong to generic dispatch queue. - -elevator_add_req_fn* called to add a new request into the scheduler - -elevator_former_req_fn -elevator_latter_req_fn These return the request before or after the - one specified in disk sort order. Used by the - block layer to find merge possibilities. - -elevator_completed_req_fn called when a request is completed. - -elevator_set_req_fn -elevator_put_req_fn Must be used to allocate and free any elevator - specific storage for a request. - -elevator_activate_req_fn Called when device driver first sees a request. - I/O schedulers can use this callback to - determine when actual execution of a request - starts. -elevator_deactivate_req_fn Called when device driver decides to delay - a request by requeueing it. - -elevator_init_fn* -elevator_exit_fn Allocate and free any elevator specific storage - for a queue. -=============================== ================================================ - -4.2 Request flows seen by I/O schedulers ----------------------------------------- - -All requests seen by I/O schedulers strictly follow one of the following three -flows. - - set_req_fn -> - - i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn -> - (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn - ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn - iii. [none] - - -> put_req_fn - -4.3 I/O scheduler implementation --------------------------------- - -The generic i/o scheduler algorithm attempts to sort/merge/batch requests for -optimal disk scan and request servicing performance (based on generic -principles and device capabilities), optimized for: - -i. improved throughput -ii. improved latency -iii. better utilization of h/w & CPU time - -Characteristics: - -i. Binary tree -AS and deadline i/o schedulers use red black binary trees for disk position -sorting and searching, and a fifo linked list for time-based searching. This -gives good scalability and good availability of information. Requests are -almost always dispatched in disk sort order, so a cache is kept of the next -request in sort order to prevent binary tree lookups. - -This arrangement is not a generic block layer characteristic however, so -elevators may implement queues as they please. - -ii. Merge hash -AS and deadline use a hash table indexed by the last sector of a request. This -enables merging code to quickly look up "back merge" candidates, even when -multiple I/O streams are being performed at once on one disk. - -"Front merges", a new request being merged at the front of an existing request, -are far less common than "back merges" due to the nature of most I/O patterns. -Front merges are handled by the binary trees in AS and deadline schedulers. - -iii. Plugging the queue to batch requests in anticipation of opportunities for - merge/sort optimizations - -Plugging is an approach that the current i/o scheduling algorithm resorts to so -that it collects up enough requests in the queue to be able to take -advantage of the sorting/merging logic in the elevator. If the -queue is empty when a request comes in, then it plugs the request queue -(sort of like plugging the bath tub of a vessel to get fluid to build up) -till it fills up with a few more requests, before starting to service -the requests. This provides an opportunity to merge/sort the requests before -passing them down to the device. There are various conditions when the queue is -unplugged (to open up the flow again), either through a scheduled task or -could be on demand. - -Aside: - This is kind of controversial territory, as it's not clear if plugging is - always the right thing to do. Devices typically have their own queues, - and allowing a big queue to build up in software, while letting the device be - idle for a while may not always make sense. The trick is to handle the fine - balance between when to plug and when to open up. Also now that we have - multi-page bios being queued in one shot, we may not need to wait to merge - a big request from the broken up pieces coming by. - -4.4 I/O contexts ----------------- - -I/O contexts provide a dynamically allocated per process data area. They may -be used in I/O schedulers, and in the block layer (could be used for IO statis, -priorities for example). See `*io_context` in block/ll_rw_blk.c, and as-iosched.c -for an example of usage in an i/o scheduler. - - -5. Scalability related changes -============================== - -5.1 Granular Locking: io_request_lock replaced by a per-queue lock ------------------------------------------------------------------- - -The global io_request_lock has been removed as of 2.5, to avoid -the scalability bottleneck it was causing, and has been replaced by more -granular locking. The request queue structure has a pointer to the -lock to be used for that queue. As a result, locking can now be -per-queue, with a provision for sharing a lock across queues if -necessary (e.g the scsi layer sets the queue lock pointers to the -corresponding adapter lock, which results in a per host locking -granularity). The locking semantics are the same, i.e. locking is -still imposed by the block layer, grabbing the lock before -request_fn execution which it means that lots of older drivers -should still be SMP safe. Drivers are free to drop the queue -lock themselves, if required. Drivers that explicitly used the -io_request_lock for serialization need to be modified accordingly. -Usually it's as easy as adding a global lock:: - - static DEFINE_SPINLOCK(my_driver_lock); - -and passing the address to that lock to blk_init_queue(). - -5.2 64 bit sector numbers (sector_t prepares for 64 bit support) ----------------------------------------------------------------- - -The sector number used in the bio structure has been changed to sector_t, -which could be defined as 64 bit in preparation for 64 bit sector support. - -6. Other Changes/Implications -============================= - -6.1 Partition re-mapping handled by the generic block layer ------------------------------------------------------------ - -In 2.5 some of the gendisk/partition related code has been reorganized. -Now the generic block layer performs partition-remapping early and thus -provides drivers with a sector number relative to whole device, rather than -having to take partition number into account in order to arrive at the true -sector number. The routine blk_partition_remap() is invoked by -submit_bio_noacct even before invoking the queue specific ->submit_bio, -so the i/o scheduler also gets to operate on whole disk sector numbers. This -should typically not require changes to block drivers, it just never gets -to invoke its own partition sector offset calculations since all bios -sent are offset from the beginning of the device. - - -7. A Few Tips on Migration of older drivers -=========================================== - -Old-style drivers that just use CURRENT and ignores clustered requests, -may not need much change. The generic layer will automatically handle -clustered requests, multi-page bios, etc for the driver. - -For a low performance driver or hardware that is PIO driven or just doesn't -support scatter-gather changes should be minimal too. - -The following are some points to keep in mind when converting old drivers -to bio. - -Drivers should use elv_next_request to pick up requests and are no longer -supposed to handle looping directly over the request list. -(struct request->queue has been removed) - -Now end_that_request_first takes an additional number_of_sectors argument. -It used to handle always just the first buffer_head in a request, now -it will loop and handle as many sectors (on a bio-segment granularity) -as specified. - -Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the -right thing to use is bio_endio(bio) instead. - -If the driver is dropping the io_request_lock from its request_fn strategy, -then it just needs to replace that with q->queue_lock instead. - -As described in Sec 1.1, drivers can set max sector size, max segment size -etc per queue now. Drivers that used to define their own merge functions i -to handle things like this can now just use the blk_queue_* functions at -blk_init_queue time. - -Drivers no longer have to map a {partition, sector offset} into the -correct absolute location anymore, this is done by the block layer, so -where a driver received a request ala this before:: - - rq->rq_dev = mk_kdev(3, 5); /* /dev/hda5 */ - rq->sector = 0; /* first sector on hda5 */ - -it will now see:: - - rq->rq_dev = mk_kdev(3, 0); /* /dev/hda */ - rq->sector = 123128; /* offset from start of disk */ - -As mentioned, there is no virtual mapping of a bio. For DMA, this is -not a problem as the driver probably never will need a virtual mapping. -Instead it needs a bus mapping (dma_map_page for a single segment or -use dma_map_sg for scatter gather) to be able to ship it to the driver. For -PIO drivers (or drivers that need to revert to PIO transfer once in a -while (IDE for example)), where the CPU is doing the actual data -transfer a virtual mapping is needed. If the driver supports highmem I/O, -(Sec 1.1, (ii) ) it needs to use kmap_atomic or similar to temporarily map -a bio into the virtual address space. - - -8. Prior/Related/Impacted patches -================================= - -8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp) ------------------------------------------------------ - -- orig kiobuf & raw i/o patches (now in 2.4 tree) -- direct kiobuf based i/o to devices (no intermediate bh's) -- page i/o using kiobuf -- kiobuf splitting for lvm (mkp) -- elevator support for kiobuf request merging (axboe) - -8.2. Zero-copy networking (Dave Miller) ---------------------------------------- - -8.3. SGI XFS - pagebuf patches - use of kiobufs ------------------------------------------------ -8.4. Multi-page pioent patch for bio (Christoph Hellwig) --------------------------------------------------------- -8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11 --------------------------------------------------------------------- -8.6. Async i/o implementation patch (Ben LaHaise) -------------------------------------------------- -8.7. EVMS layering design (IBM EVMS team) ------------------------------------------ -8.8. Larger page cache size patch (Ben LaHaise) and Large page size (Daniel Phillips) -------------------------------------------------------------------------------------- - - => larger contiguous physical memory buffers - -8.9. VM reservations patch (Ben LaHaise) ----------------------------------------- -8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?) ----------------------------------------------------------- -8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+ ---------------------------------------------------------------------------- -8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar, Badari) -------------------------------------------------------------------------------- -8.13 Priority based i/o scheduler - prepatches (Arjan van de Ven) ------------------------------------------------------------------- -8.14 IDE Taskfile i/o patch (Andre Hedrick) --------------------------------------------- -8.15 Multi-page writeout and readahead patches (Andrew Morton) ---------------------------------------------------------------- -8.16 Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy) ------------------------------------------------------------------------ - -9. Other References -=================== - -9.1 The Splice I/O Model ------------------------- - -Larry McVoy (and subsequent discussions on lkml, and Linus' comments - Jan 2001 - -9.2 Discussions about kiobuf and bh design ------------------------------------------- - -On lkml between sct, linus, alan et al - Feb-March 2001 (many of the -initial thoughts that led to bio were brought up in this discussion thread) - -9.3 Discussions on mempool on lkml - Dec 2001. ----------------------------------------------- diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst index 3a41495dd77b5..68f115f2b1c6b 100644 --- a/Documentation/block/index.rst +++ b/Documentation/block/index.rst @@ -8,7 +8,6 @@ Block :maxdepth: 1 bfq-iosched - biodoc biovecs blk-mq capability -- GitLab From ec9fd2a13d743ae129bd1b5c6edea0d65260ce10 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Feb 2022 03:52:47 -0800 Subject: [PATCH 0411/1586] blk-lib: don't check bdev_get_queue() NULL check Based on the comment present in the bdev_get_queue() bdev->bd_queue can never be NULL. Remove the NULL check for the local variable q that is set from bdev_get_queue() for discard, write_same, and write_zeroes. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220215115247.11717-2-kch@nvidia.com Signed-off-by: Jens Axboe --- block/blk-lib.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 1b8ced45e4e55..fc6ea52e74824 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -19,9 +19,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, unsigned int op; sector_t bs_mask, part_offset = 0; - if (!q) - return -ENXIO; - if (bdev_read_only(bdev)) return -EPERM; @@ -156,9 +153,6 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, struct bio *bio = *biop; sector_t bs_mask; - if (!q) - return -ENXIO; - if (bdev_read_only(bdev)) return -EPERM; @@ -232,10 +226,6 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, { struct bio *bio = *biop; unsigned int max_write_zeroes_sectors; - struct request_queue *q = bdev_get_queue(bdev); - - if (!q) - return -ENXIO; if (bdev_read_only(bdev)) return -EPERM; @@ -284,14 +274,10 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) { - struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; int bi_size = 0; unsigned int sz; - if (!q) - return -ENXIO; - if (bdev_read_only(bdev)) return -EPERM; -- GitLab From b8ac4ee08d48d4bb46669a2deef10454313e1a00 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 7 Jan 2022 16:00:55 +0000 Subject: [PATCH 0412/1586] arm64: booting.rst: Clarify on requiring non-secure EL2 The ARMv8.4 architecture revision introduced the EL2 exception level to the secure world. Clarify the existing wording to make sure that Linux relies on being executed in the non-secure state. Signed-off-by: Andre Przywara Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220107160056.322141-2-andre.przywara@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/booting.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 52d060caf8bba..29884b261aa9c 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and is relevant to all public releases of the AArch64 Linux kernel. The AArch64 exception model is made up of a number of exception levels -(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure -counterpart. EL2 is the hypervisor level and exists only in non-secure -mode. EL3 is the highest priority level and exists only in secure mode. +(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure +counterpart. EL2 is the hypervisor level, EL3 is the highest priority +level and exists only in secure mode. Both are architecturally optional. For the purposes of this document, we will use the term `boot loader` simply to define all software that executes on the CPU(s) before control @@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met: All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError, IRQ and FIQ). - The CPU must be in either EL2 (RECOMMENDED in order to have access to - the virtualisation extensions) or non-secure EL1. + The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order + to have access to the virtualisation extensions), or in EL1. - Caches, MMUs -- GitLab From 35bde68bba5413592d88864eced79f8a0482bb4f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 13 Jan 2022 14:12:39 +0100 Subject: [PATCH 0413/1586] arm64: random: implement arch_get_random_int/_long based on RNDR When support for RNDR/RNDRRS was introduced, we elected to only implement arch_get_random_seed_int/_long(), and back them by RNDR instead of RNDRRS. This was needed to prevent potential performance and/or starvation issues resulting from the fact that the /dev/random driver used to invoke these routines on various hot paths. These issues have all been addressed now [0] [1], and so we can wire up this API more straight-forwardly: - map arch_get_random_int/_long() onto RNDR, which returns the output of a DRBG that is reseeded at an implemented defined rate; - map arch_get_random_seed_int/_long() onto the TRNG firmware service, which returns true, conditioned entropy, or onto RNDRRS if the TRNG service is unavailable, which returns the output of a DRBG that is reseeded every time it is used. [0] 390596c9959c random: avoid arch_get_random_seed_long() when collecting IRQ randomness [1] 2ee25b6968b1 random: avoid superfluous call to RDRAND in CRNG extraction Cc: Andre Przywara Cc: Mark Brown Signed-off-by: Ard Biesheuvel Acked-by: Jason A. Donenfeld Reviewed-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220113131239.1610455-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/archrandom.h | 45 +++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 09e43272ccb0a..d1bb5e71df256 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v) return ok; } +static inline bool __arm64_rndrrs(unsigned long *v) +{ + bool ok; + + /* + * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success, + * and set PSTATE.NZCV to 0b0100 otherwise. + */ + asm volatile( + __mrs_s("%0", SYS_RNDRRS_EL0) "\n" + " cset %w1, ne\n" + : "=r" (*v), "=r" (ok) + : + : "cc"); + + return ok; +} + static inline bool __must_check arch_get_random_long(unsigned long *v) { + /* + * Only support the generic interface after we have detected + * the system wide capability, avoiding complexity with the + * cpufeature code and with potential scheduling between CPUs + * with and without the feature. + */ + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + return true; return false; } static inline bool __must_check arch_get_random_int(unsigned int *v) { + if (cpus_have_const_cap(ARM64_HAS_RNG)) { + unsigned long val; + + if (__arm64_rndr(&val)) { + *v = val; + return true; + } + } return false; } @@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) } /* - * Only support the generic interface after we have detected - * the system wide capability, avoiding complexity with the - * cpufeature code and with potential scheduling between CPUs - * with and without the feature. + * RNDRRS is not backed by an entropy source but by a DRBG that is + * reseeded after each invocation. This is not a 100% fit but good + * enough to implement this API if no other entropy source exists. */ - if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v)) return true; return false; @@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) } if (cpus_have_const_cap(ARM64_HAS_RNG)) { - if (__arm64_rndr(&val)) { + if (__arm64_rndrrs(&val)) { *v = val; return true; } -- GitLab From 3352a5556f52bb49b82c0258c0c67f7371ba1f80 Mon Sep 17 00:00:00 2001 From: He Ying Date: Tue, 11 Jan 2022 22:24:10 -0500 Subject: [PATCH 0414/1586] arm64: entry: Save some nops when CONFIG_ARM64_PSEUDO_NMI is not set Arm64 pseudo-NMI feature code brings some additional nops when CONFIG_ARM64_PSEUDO_NMI is not set, which is not necessary. So add necessary ifdeffery to avoid it. Signed-off-by: He Ying Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220112032410.29231-1-heying24@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 772ec2ecf4888..eb59621d6c6a2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -300,6 +300,7 @@ alternative_else_nop_endif str w21, [sp, #S_SYSCALLNO] .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Save pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs_s x20, SYS_ICC_PMR_EL1 @@ -307,6 +308,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 alternative_else_nop_endif +#endif /* Re-enable tag checking (TCO set on exception entry) */ #ifdef CONFIG_ARM64_MTE @@ -330,6 +332,7 @@ alternative_else_nop_endif disable_daif .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Restore pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] @@ -339,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING dsb sy // Ensure priority change is seen by redistributor .L__skip_pmr_sync\@: alternative_else_nop_endif +#endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR -- GitLab From e921da6bc7cac5f0e8458fe5df18ae08eb538f54 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 25 Jan 2022 20:08:33 +0530 Subject: [PATCH 0415/1586] arm64/mm: Consolidate TCR_EL1 fields This renames and moves SYS_TCR_EL1_TCMA1 and SYS_TCR_EL1_TCMA0 definitions into pgtable-hwdef.h thus consolidating all TCR fields in a single header. This does not cause any functional change. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1643121513-21854-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 2 ++ arch/arm64/include/asm/sysreg.h | 4 ---- arch/arm64/mm/proc.S | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 40085e53f573d..66671ff051835 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -273,6 +273,8 @@ #define TCR_NFD1 (UL(1) << 54) #define TCR_E0PD0 (UL(1) << 55) #define TCR_E0PD1 (UL(1) << 56) +#define TCR_TCMA0 (UL(1) << 57) +#define TCR_TCMA1 (UL(1) << 58) /* * TTBR. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004aee..34800d264f690 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1101,10 +1101,6 @@ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) -/* TCR EL1 Bit Definitions */ -#define SYS_TCR_EL1_TCMA1 (BIT(58)) -#define SYS_TCR_EL1_TCMA0 (BIT(57)) - /* GCR_EL1 Definitions */ #define SYS_GCR_EL1_RRND (BIT(16)) #define SYS_GCR_EL1_EXCL_MASK 0xffffUL diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index d35c90d2e47ad..50bbed947bec7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 #else /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on -- GitLab From ee017ee353506fcec58e481673e4331ff198a80e Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Tue, 1 Feb 2022 19:44:00 +0800 Subject: [PATCH 0416/1586] arm64/mm: avoid fixmap race condition when create pud mapping The 'fixmap' is a global resource and is used recursively by create pud mapping(), leading to a potential race condition in the presence of a concurrent call to alloc_init_pud(): kernel_init thread virtio-mem workqueue thread ================== =========================== alloc_init_pud(...) alloc_init_pud(...) pudp = pud_set_fixmap_offset(...) pudp = pud_set_fixmap_offset(...) READ_ONCE(*pudp) pud_clear_fixmap(...) READ_ONCE(*pudp) // CRASH! As kernel may sleep during creating pud mapping, introduce a mutex lock to serialise use of the fixmap entries by alloc_init_pud(). However, there is no need for locking in early boot stage and it doesn't work well with KASLR enabled when early boot. So, enable lock when system_state doesn't equal to "SYSTEM_BOOTING". Signed-off-by: Jianyong Wu Reviewed-by: Catalin Marinas Fixes: f4710445458c ("arm64: mm: use fixmap when creating page tables") Link: https://lore.kernel.org/r/20220201114400.56885-1-jianyong.wu@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8c..1681430ecab73 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static DEFINE_SPINLOCK(swapper_pgdir_lock); +static DEFINE_MUTEX(fixmap_lock); void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -329,6 +330,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); + /* + * No need for locking during early boot. And it doesn't work as + * expected with KASLR enabled. + */ + if (system_state != SYSTEM_BOOTING) + mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -359,6 +366,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + if (system_state != SYSTEM_BOOTING) + mutex_unlock(&fixmap_lock); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, -- GitLab From a6aab018829948c1818bed656656df9ae321408b Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 27 Jan 2022 16:21:27 +0000 Subject: [PATCH 0417/1586] arm64: insn: Generate 64 bit mask immediates correctly When the insn framework is used to encode an AND/ORR/EOR instruction, aarch64_encode_immediate() is used to pick the immr imms values. If the immediate is a 64bit mask, with bit 63 set, and zeros in any of the upper 32 bits, the immr value is incorrectly calculated meaning the wrong mask is generated. For example, 0x8000000000000001 should have an immr of 1, but 32 is used, meaning the resulting mask is 0x0000000300000000. It would appear eBPF is unable to hit these cases, as build_insn()'s imm value is a s32, so when used with BPF_ALU64, the sign-extended u64 immediate would always have all-1s or all-0s in the upper 32 bits. KVM does not generate a va_mask with any of the top bits set as these VA wouldn't be usable with TTBR0_EL2. This happens because the rotation is calculated from fls(~imm), which takes an unsigned int, but the immediate may be 64bit. Use fls64() so the 64bit mask doesn't get truncated to a u32. Signed-off-by: James Morse Brown-paper-bag-for: Marc Zyngier Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127162127.2391947-4-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index fccfe363e5679..e485cd735261c 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -1379,7 +1379,7 @@ static u32 aarch64_encode_immediate(u64 imm, * Compute the rotation to get a continuous set of * ones, with the first bit set at position 0 */ - ror = fls(~imm); + ror = fls64(~imm); } /* -- GitLab From 3673d4b9cf68164678c6bb8a380bfb9eebb49432 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Jan 2022 17:17:48 +0000 Subject: [PATCH 0418/1586] kselftest/arm64: Remove local ARRAY_SIZE() definitions An ARRAY_SIZE() has been added to kselftest.h so remove the local versions in some of the arm64 selftests. Signed-off-by: Mark Brown Reviewed-by: Shuah Khan Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220124171748.2195875-1-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/syscall-abi.c | 1 - tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index d8eeeafb50dcb..1e13b7523918e 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -18,7 +18,6 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) extern void do_syscall(int sve_vl); diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index a3c1e67441f9e..4bd333768cc45 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -21,8 +21,6 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - /* and don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 -- GitLab From 396520759bd3bf4a557e4edba9a63afc13cc5773 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 26 Jan 2022 17:44:21 +0000 Subject: [PATCH 0419/1586] kselftest/arm64: Remove local definitions of MTE prctls The GCR EL1 test unconditionally includes local definitions of the prctls it tests. Since not only will the kselftest build infrastructure ensure that the in tree uapi headers are available but the toolchain being used to build kselftest may ensure that system uapi headers with MTE support are available this causes the compiler to warn about duplicate definitions. Remove these duplicate definitions. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20220126174421.1712795-1-broonie@kernel.org Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_gcr_el1_cswitch.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c index a876db1f096ab..325bca0de0f6e 100644 --- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c +++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c @@ -19,17 +19,6 @@ #include "kselftest.h" #include "mte_common_util.h" -#define PR_SET_TAGGED_ADDR_CTRL 55 -#define PR_GET_TAGGED_ADDR_CTRL 56 -# define PR_TAGGED_ADDR_ENABLE (1UL << 0) -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TAG_SHIFT 3 -# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) - #include "mte_def.h" #define NUM_ITERATIONS 1024 -- GitLab From 4c022f57ad954e1ad6f838bd3b7d54e459745eeb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 27 Jan 2022 19:03:23 +0000 Subject: [PATCH 0420/1586] arm64/mte: Clarify mode reported by PR_GET_TAGGED_ADDR_CTRL With the current wording readers might infer that PR_GET_TAGGED_ADDR_CTRL will report the mode currently active in the thread however this is not the actual behaviour, instead all modes currently selected by the process will be reported with the mode used depending on the combination of the requested modes and the default set for the current CPU. This has been the case since 433c38f40f6a81 ("arm64: mte: change ASYNC and SYNC TCF settings into bitfields"), before that we did not allow more than one mode to be requested simultaneously. Update the documentation to more clearly reflect current behaviour. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220127190324.660405-1-broonie@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/memory-tagging-extension.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb6..a0ab2f65b235c 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -91,8 +91,9 @@ mode is specified, the program will run in that mode. If multiple modes are specified, the mode is selected as described in the "Per-CPU preferred tag checking modes" section below. -The current tag check fault mode can be read using the -``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. +The current tag check fault configuration can be read using the +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If +multiple modes were requested then all will be reported. Tag checking can also be disabled for a user thread by setting the ``PSTATE.TCO`` bit with ``MSR TCO, #1``. -- GitLab From 65603435599f6425eadf51201956c88a03606ca7 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Wed, 24 Nov 2021 21:44:18 +0000 Subject: [PATCH 0421/1586] ima: Fix trivial typos in the comments There are a few minor typos in the comments. Fix these. Signed-off-by: Austin Kim Signed-off-by: Mimi Zohar --- security/integrity/ima/ima_api.c | 2 +- security/integrity/ima/ima_main.c | 2 +- security/integrity/ima/ima_policy.c | 2 +- security/integrity/ima/ima_template_lib.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index a64fb0130b015..5b220a2fe573d 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -238,7 +238,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, goto out; /* - * Dectecting file change is based on i_version. On filesystems + * Detecting file change is based on i_version. On filesystems * which do not support i_version, support is limited to an initial * measurement/appraisal/audit. */ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 8c6e4514d4944..8ed6da428328b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -764,7 +764,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size, * call to ima_post_load_data(). * * Callers of this LSM hook can not measure, appraise, or audit the - * data provided by userspace. Enforce policy rules requring a file + * data provided by userspace. Enforce policy rules requiring a file * signature (eg. kexec'ed kernel image). * * For permission return 0, otherwise return -EACCES. diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 2a1f6418b10a6..90f528558adc2 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -429,7 +429,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry) /* * ima_lsm_copy_rule() shallow copied all references, except for the * LSM references, from entry to nentry so we only want to free the LSM - * references and the entry itself. All other memory refrences will now + * references and the entry itself. All other memory references will now * be owned by nentry. */ ima_lsm_free_rule(entry); diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index ca017cae73eb3..5a5d462ab36db 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -272,7 +272,7 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize, * digest formats: * - DATA_FMT_DIGEST: digest * - DATA_FMT_DIGEST_WITH_ALGO: [] + ':' + '\0' + digest, - * where is provided if the hash algoritm is not + * where is provided if the hash algorithm is not * SHA1 or MD5 */ u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 }; -- GitLab From 18848c7191320ae1e5f0afdda7fb99f25daadc75 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 17 Jan 2022 09:03:29 -0500 Subject: [PATCH 0422/1586] MAINTAINERS: add missing "security/integrity" directory Update the IMA and EVM records to include the "security/integrity/" directory. Reviewed-by: Petr Vorel Signed-off-by: Mimi Zohar --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fca970a46e77a..fdf0420ba477e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7273,7 +7273,9 @@ Extended Verification Module (EVM) M: Mimi Zohar L: linux-integrity@vger.kernel.org S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git F: security/integrity/evm/ +F: security/integrity/ EXTENSIBLE FIRMWARE INTERFACE (EFI) M: Ard Biesheuvel @@ -9521,6 +9523,7 @@ L: linux-integrity@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git F: security/integrity/ima/ +F: security/integrity/ INTEL 810/815 FRAMEBUFFER DRIVER M: Antonino Daplas -- GitLab From e4e071baea41e43aebd7f17ed1ffceeca6aa9868 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Tue, 1 Feb 2022 15:37:11 -0500 Subject: [PATCH 0423/1586] ima: Return error code obtained from securityfs functions If an error occurs when creating a securityfs file, return the exact error code to the caller. Signed-off-by: Stefan Berger Signed-off-by: Mimi Zohar --- security/integrity/ima/ima_fs.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 3ad8f7734208b..cd1683dad3bf0 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -452,47 +452,61 @@ static const struct file_operations ima_measure_policy_ops = { int __init ima_fs_init(void) { + int ret; + ima_dir = securityfs_create_dir("ima", integrity_dir); if (IS_ERR(ima_dir)) - return -1; + return PTR_ERR(ima_dir); ima_symlink = securityfs_create_symlink("ima", NULL, "integrity/ima", NULL); - if (IS_ERR(ima_symlink)) + if (IS_ERR(ima_symlink)) { + ret = PTR_ERR(ima_symlink); goto out; + } binary_runtime_measurements = securityfs_create_file("binary_runtime_measurements", S_IRUSR | S_IRGRP, ima_dir, NULL, &ima_measurements_ops); - if (IS_ERR(binary_runtime_measurements)) + if (IS_ERR(binary_runtime_measurements)) { + ret = PTR_ERR(binary_runtime_measurements); goto out; + } ascii_runtime_measurements = securityfs_create_file("ascii_runtime_measurements", S_IRUSR | S_IRGRP, ima_dir, NULL, &ima_ascii_measurements_ops); - if (IS_ERR(ascii_runtime_measurements)) + if (IS_ERR(ascii_runtime_measurements)) { + ret = PTR_ERR(ascii_runtime_measurements); goto out; + } runtime_measurements_count = securityfs_create_file("runtime_measurements_count", S_IRUSR | S_IRGRP, ima_dir, NULL, &ima_measurements_count_ops); - if (IS_ERR(runtime_measurements_count)) + if (IS_ERR(runtime_measurements_count)) { + ret = PTR_ERR(runtime_measurements_count); goto out; + } violations = securityfs_create_file("violations", S_IRUSR | S_IRGRP, ima_dir, NULL, &ima_htable_violations_ops); - if (IS_ERR(violations)) + if (IS_ERR(violations)) { + ret = PTR_ERR(violations); goto out; + } ima_policy = securityfs_create_file("policy", POLICY_FILE_FLAGS, ima_dir, NULL, &ima_measure_policy_ops); - if (IS_ERR(ima_policy)) + if (IS_ERR(ima_policy)) { + ret = PTR_ERR(ima_policy); goto out; + } return 0; out: @@ -503,5 +517,6 @@ out: securityfs_remove(binary_runtime_measurements); securityfs_remove(ima_symlink); securityfs_remove(ima_dir); - return -1; + + return ret; } -- GitLab From aae6ccbd826d26730a6fd9bc01884f0a0a9cbb25 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 28 Dec 2021 09:53:14 -0500 Subject: [PATCH 0424/1586] ima: rename IMA_ACTION_FLAGS to IMA_NONACTION_FLAGS Simple policy rule options, such as fowner, uid, or euid, can be checked immediately, while other policy rule options, such as requiring a file signature, need to be deferred. The 'flags' field in the integrity_iint_cache struct contains the policy action', 'subaction', and non action/subaction. action: measure/measured, appraise/appraised, (collect)/collected, audit/audited subaction: appraise status for each hook (e.g. file, mmap, bprm, read, creds) non action/subaction: deferred policy rule options and state Rename the IMA_ACTION_FLAGS to IMA_NONACTION_FLAGS. Reviewed-by: Stefan Berger Signed-off-by: Mimi Zohar --- security/integrity/ima/ima_main.c | 2 +- security/integrity/ima/ima_policy.c | 2 +- security/integrity/integrity.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 8ed6da428328b..7c80dfe2c7a54 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -263,7 +263,7 @@ static int process_measurement(struct file *file, const struct cred *cred, /* reset appraisal flags if ima_inode_post_setattr was called */ iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | - IMA_ACTION_FLAGS); + IMA_NONACTION_FLAGS); /* * Re-evaulate the file if either the xattr has changed or the diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 90f528558adc2..a0f3775cbd82b 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -712,7 +712,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode, func, mask, func_data)) continue; - action |= entry->flags & IMA_ACTION_FLAGS; + action |= entry->flags & IMA_NONACTION_FLAGS; action |= entry->action & IMA_DO_MASK; if (entry->action & IMA_APPRAISE) { diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 547425c20e117..d045dccd415af 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -30,8 +30,8 @@ #define IMA_HASH 0x00000100 #define IMA_HASHED 0x00000200 -/* iint cache flags */ -#define IMA_ACTION_FLAGS 0xff000000 +/* iint policy rule cache flags */ +#define IMA_NONACTION_FLAGS 0xff000000 #define IMA_DIGSIG_REQUIRED 0x01000000 #define IMA_PERMIT_DIRECTIO 0x02000000 #define IMA_NEW_FILE 0x04000000 -- GitLab From 8c54135e2e6da677291012813a26a5f1b2c8a90a Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 24 Jan 2022 14:26:23 -0500 Subject: [PATCH 0425/1586] ima: define ima_max_digest_data struct without a flexible array variable To support larger hash digests in the 'iint' cache, instead of defining the 'digest' field as the maximum digest size, the 'digest' field was defined as a flexible array variable. The "ima_digest_data" struct was wrapped inside a local structure with the maximum digest size. But before adding the record to the iint cache, memory for the exact digest size was dynamically allocated. The original reason for defining the 'digest' field as a flexible array variable is still valid for the 'iint' cache use case. Instead of wrapping the 'ima_digest_data' struct in a local structure define 'ima_max_digest_data' struct. Reviewed-by: Stefan Berger Signed-off-by: Mimi Zohar --- security/integrity/ima/ima_api.c | 10 ++++------ security/integrity/ima/ima_init.c | 5 +---- security/integrity/ima/ima_main.c | 5 +---- security/integrity/ima/ima_template_lib.c | 5 +---- security/integrity/integrity.h | 10 ++++++++++ 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 5b220a2fe573d..c6805af462118 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -217,14 +217,11 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, const char *audit_cause = "failed"; struct inode *inode = file_inode(file); const char *filename = file->f_path.dentry->d_name.name; + struct ima_max_digest_data hash; int result = 0; int length; void *tmpbuf; u64 i_version; - struct { - struct ima_digest_data hdr; - char digest[IMA_MAX_DIGEST_SIZE]; - } hash; /* * Always collect the modsig, because IMA might have already collected @@ -239,8 +236,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, /* * Detecting file change is based on i_version. On filesystems - * which do not support i_version, support is limited to an initial - * measurement/appraisal/audit. + * which do not support i_version, support was originally limited + * to an initial measurement/appraisal/audit, but was modified to + * assume the file changed. */ i_version = inode_query_iversion(inode); hash.hdr.algo = algo; diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index b26fa67476b41..63979aefc95f7 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -47,12 +47,9 @@ static int __init ima_add_boot_aggregate(void) struct integrity_iint_cache tmp_iint, *iint = &tmp_iint; struct ima_event_data event_data = { .iint = iint, .filename = boot_aggregate_name }; + struct ima_max_digest_data hash; int result = -ENOMEM; int violation = 0; - struct { - struct ima_digest_data hdr; - char digest[TPM_MAX_DIGEST_SIZE]; - } hash; memset(iint, 0, sizeof(*iint)); memset(&hash, 0, sizeof(hash)); diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 7c80dfe2c7a54..c6412dec3810b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -874,10 +874,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns, .buf = buf, .buf_len = size}; struct ima_template_desc *template; - struct { - struct ima_digest_data hdr; - char digest[IMA_MAX_DIGEST_SIZE]; - } hash = {}; + struct ima_max_digest_data hash; char digest_hash[IMA_MAX_DIGEST_SIZE]; int digest_hash_len = hash_digest_size[ima_hash_algo]; int violation = 0; diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index 5a5d462ab36db..7155d17a3b75f 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -307,10 +307,7 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize, int ima_eventdigest_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { - struct { - struct ima_digest_data hdr; - char digest[IMA_MAX_DIGEST_SIZE]; - } hash; + struct ima_max_digest_data hash; u8 *cur_digest = NULL; u32 cur_digestsize = 0; struct inode *inode; diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index d045dccd415af..daf49894fd7d0 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -110,6 +111,15 @@ struct ima_digest_data { u8 digest[]; } __packed; +/* + * Instead of wrapping the ima_digest_data struct inside a local structure + * with the maximum hash size, define ima_max_digest_data struct. + */ +struct ima_max_digest_data { + struct ima_digest_data hdr; + u8 digest[HASH_MAX_DIGESTSIZE]; +} __packed; + /* * signature format v2 - for using with asymmetric keys */ -- GitLab From d53f8f8dbe97e4ed7d52e57581d1a8f6e62a7643 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Feb 2022 15:22:35 +0000 Subject: [PATCH 0426/1586] kselftest/arm64: mte: user_mem: introduce tag_offset and tag_len These can be used to place an MTE tag at an address that is not at a page size boundary. The kernel prior to 295cf156231c ("arm64: Avoid premature usercopy failure"), would infinite loop if an MTE tag was placed not at a PAGE_SIZE boundary. This is because the kernel checked if the pages were readable by checking the first byte of each page, but would then fault in the middle of the page due to the MTE tag. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Shuah Khan Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Shuah Khan Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220209152240.52788-2-joey.gouly@arm.com Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_user_mem.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 1de7a0abd0ae3..5a5a7e1f57890 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -19,7 +19,8 @@ static size_t page_sz; -static int check_usermem_access_fault(int mem_type, int mode, int mapping) +static int check_usermem_access_fault(int mem_type, int mode, int mapping, + int tag_offset, int tag_len) { int fd, i, err; char val = 'A'; @@ -54,10 +55,12 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) if (i < len) goto usermem_acc_err; - /* Tag the next half of memory with different value */ - ptr_next = (void *)((unsigned long)ptr + page_sz); + if (!tag_len) + tag_len = len - tag_offset; + /* Tag a part of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + tag_offset); ptr_next = mte_insert_new_tag(ptr_next); - mte_set_tag_address_range(ptr_next, page_sz); + mte_set_tag_address_range(ptr_next, tag_len); lseek(fd, 0, 0); /* Copy from file into buffer with invalid tag */ @@ -100,14 +103,14 @@ int main(int argc, char *argv[]) /* Set test plan */ ksft_set_plan(4); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0), "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0), "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0), "Check memory access from kernel in async mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0), "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); mte_restore_setup(); -- GitLab From ff0b9aba30aeca68de09b784093f4482108586a9 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Feb 2022 15:22:36 +0000 Subject: [PATCH 0427/1586] kselftest/arm64: mte: user_mem: rework error handling Future commits will have multiple iterations of tests in this function, so make the error handling assume it will pass and then bail out if there is an error. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Shuah Khan Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Shuah Khan Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220209152240.52788-3-joey.gouly@arm.com Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_user_mem.c | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 5a5a7e1f57890..2afcc9fb9ae82 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -27,7 +27,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, size_t len, read_len; void *ptr, *ptr_next; - err = KSFT_FAIL; + err = KSFT_PASS; len = 2 * page_sz; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); fd = create_temp_file(); @@ -71,14 +71,22 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, * mode without fault but may not fail in async mode as per the * implemented MTE userspace support in Arm64 kernel. */ - if (mode == MTE_SYNC_ERR && - !cur_mte_cxt.fault_valid && read_len < len) { - err = KSFT_PASS; - } else if (mode == MTE_ASYNC_ERR && - !cur_mte_cxt.fault_valid && read_len == len) { - err = KSFT_PASS; + if (cur_mte_cxt.fault_valid) + goto usermem_acc_err; + + if (mode == MTE_SYNC_ERR && read_len < len) { + /* test passed */ + } else if (mode == MTE_ASYNC_ERR && read_len == len) { + /* test passed */ + } else { + goto usermem_acc_err; } + + goto exit; + usermem_acc_err: + err = KSFT_FAIL; +exit: mte_free_memory((void *)ptr, len, mem_type, true); close(fd); return err; -- GitLab From 682b064bae871deb213ed2e97fe4a5d4a5132e37 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Feb 2022 15:22:37 +0000 Subject: [PATCH 0428/1586] kselftest/arm64: mte: user_mem: check different offsets and sizes To check there are no assumptions in the kernel about buffer sizes or alignments of user space pointers, expand the test to cover different sizes and offsets. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Shuah Khan Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Shuah Khan Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220209152240.52788-4-joey.gouly@arm.com Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_user_mem.c | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 2afcc9fb9ae82..89c861ee68fa1 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -26,6 +26,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, char val = 'A'; size_t len, read_len; void *ptr, *ptr_next; + int fileoff, ptroff, size; + int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz}; err = KSFT_PASS; len = 2 * page_sz; @@ -62,24 +64,31 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, ptr_next = mte_insert_new_tag(ptr_next); mte_set_tag_address_range(ptr_next, tag_len); - lseek(fd, 0, 0); - /* Copy from file into buffer with invalid tag */ - read_len = read(fd, ptr, len); - mte_wait_after_trig(); - /* - * Accessing user memory in kernel with invalid tag should fail in sync - * mode without fault but may not fail in async mode as per the - * implemented MTE userspace support in Arm64 kernel. - */ - if (cur_mte_cxt.fault_valid) - goto usermem_acc_err; - - if (mode == MTE_SYNC_ERR && read_len < len) { - /* test passed */ - } else if (mode == MTE_ASYNC_ERR && read_len == len) { - /* test passed */ - } else { - goto usermem_acc_err; + for (fileoff = 0; fileoff < 16; fileoff++) { + for (ptroff = 0; ptroff < 16; ptroff++) { + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + size = sizes[i]; + lseek(fd, 0, 0); + /* Copy from file into buffer with invalid tag */ + read_len = read(fd, ptr + ptroff, size); + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (cur_mte_cxt.fault_valid) { + goto usermem_acc_err; + } + if (mode == MTE_SYNC_ERR && read_len < len) { + /* test passed */ + } else if (mode == MTE_ASYNC_ERR && read_len == size) { + /* test passed */ + } else { + goto usermem_acc_err; + } + } + } } goto exit; -- GitLab From b9fc700176f1cc3d9aef7dd51423150cc1567a9a Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Feb 2022 15:22:38 +0000 Subject: [PATCH 0429/1586] kselftest/arm64: mte: user_mem: add test type enum The test is currently hardcoded to use the `read` syscall, this commit adds a test_type enum to support expanding the test coverage to other syscalls. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Shuah Khan Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Shuah Khan Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220209152240.52788-5-joey.gouly@arm.com Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_user_mem.c | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 89c861ee68fa1..58b1b272ca80f 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -19,12 +19,18 @@ static size_t page_sz; +enum test_type { + READ_TEST, + LAST_TEST, +}; + static int check_usermem_access_fault(int mem_type, int mode, int mapping, - int tag_offset, int tag_len) + int tag_offset, int tag_len, + enum test_type test_type) { int fd, i, err; char val = 'A'; - size_t len, read_len; + ssize_t len, syscall_len; void *ptr, *ptr_next; int fileoff, ptroff, size; int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz}; @@ -46,9 +52,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, } mte_initialize_current_context(mode, (uintptr_t)ptr, len); /* Copy from file into buffer with valid tag */ - read_len = read(fd, ptr, len); + syscall_len = read(fd, ptr, len); mte_wait_after_trig(); - if (cur_mte_cxt.fault_valid || read_len < len) + if (cur_mte_cxt.fault_valid || syscall_len < len) goto usermem_acc_err; /* Verify same pattern is read */ for (i = 0; i < len; i++) @@ -69,8 +75,16 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, for (i = 0; i < ARRAY_SIZE(sizes); i++) { size = sizes[i]; lseek(fd, 0, 0); - /* Copy from file into buffer with invalid tag */ - read_len = read(fd, ptr + ptroff, size); + + /* perform file operation on buffer with invalid tag */ + switch (test_type) { + case READ_TEST: + syscall_len = read(fd, ptr + ptroff, size); + break; + case LAST_TEST: + goto usermem_acc_err; + } + mte_wait_after_trig(); /* * Accessing user memory in kernel with invalid tag should fail in sync @@ -80,9 +94,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, if (cur_mte_cxt.fault_valid) { goto usermem_acc_err; } - if (mode == MTE_SYNC_ERR && read_len < len) { + if (mode == MTE_SYNC_ERR && syscall_len < len) { /* test passed */ - } else if (mode == MTE_ASYNC_ERR && read_len == size) { + } else if (mode == MTE_ASYNC_ERR && syscall_len == size) { /* test passed */ } else { goto usermem_acc_err; @@ -120,14 +134,14 @@ int main(int argc, char *argv[]) /* Set test plan */ ksft_set_plan(4); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST), "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST), "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST), "Check memory access from kernel in async mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0), + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST), "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); mte_restore_setup(); -- GitLab From e8d3974f34fa8ac38915c307677657b4d6acc619 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Feb 2022 15:22:39 +0000 Subject: [PATCH 0430/1586] kselftest/arm64: mte: user_mem: add more test types To expand the test coverage for MTE tags in userspace memory, also perform the test with `write`, `readv` and `writev` syscalls. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Shuah Khan Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Shuah Khan Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220209152240.52788-6-joey.gouly@arm.com Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_user_mem.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 58b1b272ca80f..bb4974c437f8a 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "kselftest.h" @@ -21,6 +22,9 @@ static size_t page_sz; enum test_type { READ_TEST, + WRITE_TEST, + READV_TEST, + WRITEV_TEST, LAST_TEST, }; @@ -81,6 +85,23 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, case READ_TEST: syscall_len = read(fd, ptr + ptroff, size); break; + case WRITE_TEST: + syscall_len = write(fd, ptr + ptroff, size); + break; + case READV_TEST: { + struct iovec iov[1]; + iov[0].iov_base = ptr + ptroff; + iov[0].iov_len = size; + syscall_len = readv(fd, iov, 1); + break; + } + case WRITEV_TEST: { + struct iovec iov[1]; + iov[0].iov_base = ptr + ptroff; + iov[0].iov_len = size; + syscall_len = writev(fd, iov, 1); + break; + } case LAST_TEST: goto usermem_acc_err; } -- GitLab From 0a775ccb81207413d07214ac6eaed75d0e4376b1 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Feb 2022 15:22:40 +0000 Subject: [PATCH 0431/1586] kselftest/arm64: mte: user_mem: test a wider range of values Instead of hard coding a small amount of tests, generate a wider range of tests to try catch any corner cases that could show up. These new tests test different MTE tag lengths and offsets, which previously would have caused infinite loops in the kernel. This was fixed by 295cf156231c ("arm64: Avoid premature usercopy failure"), so these are regressions tests for that corner case. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Shuah Khan Reviewed-by: Mark Brown Tested-by: Mark Brown Reviewed-by: Shuah Khan Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220209152240.52788-7-joey.gouly@arm.com Signed-off-by: Will Deacon --- .../selftests/arm64/mte/check_user_mem.c | 94 ++++++++++++++++--- 1 file changed, 83 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index bb4974c437f8a..f4ae5f87a3b77 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include #include #include #include @@ -20,6 +21,8 @@ static size_t page_sz; +#define TEST_NAME_MAX 100 + enum test_type { READ_TEST, WRITE_TEST, @@ -136,9 +139,67 @@ exit: return err; } +void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) { + const char* test_type; + const char* mte_type; + const char* map_type; + + switch (type) { + case READ_TEST: + test_type = "read"; + break; + case WRITE_TEST: + test_type = "write"; + break; + case READV_TEST: + test_type = "readv"; + break; + case WRITEV_TEST: + test_type = "writev"; + break; + default: + assert(0); + break; + } + + switch (sync) { + case MTE_SYNC_ERR: + mte_type = "MTE_SYNC_ERR"; + break; + case MTE_ASYNC_ERR: + mte_type = "MTE_ASYNC_ERR"; + break; + default: + assert(0); + break; + } + + switch (map) { + case MAP_SHARED: + map_type = "MAP_SHARED"; + break; + case MAP_PRIVATE: + map_type = "MAP_PRIVATE"; + break; + default: + assert(0); + break; + } + + snprintf(name, name_len, + "test type: %s, %s, %s, tag len: %d, tag offset: %d\n", + test_type, mte_type, map_type, len, offset); +} + int main(int argc, char *argv[]) { int err; + int t, s, m, l, o; + int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR}; + int maps[] = {MAP_SHARED, MAP_PRIVATE}; + int tag_lens[] = {0, MT_GRANULE_SIZE}; + int tag_offsets[] = {page_sz, MT_GRANULE_SIZE}; + char test_name[TEST_NAME_MAX]; page_sz = getpagesize(); if (!page_sz) { @@ -153,17 +214,28 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); /* Set test plan */ - ksft_set_plan(4); - - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST), - "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST), - "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); - - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST), - "Check memory access from kernel in async mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST), - "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + ksft_set_plan(64); + + for (t = 0; t < LAST_TEST; t++) { + for (s = 0; s < ARRAY_SIZE(mte_sync); s++) { + for (m = 0; m < ARRAY_SIZE(maps); m++) { + for (l = 0; l < ARRAY_SIZE(tag_lens); l++) { + for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) { + int sync = mte_sync[s]; + int map = maps[m]; + int offset = tag_offsets[o]; + int tag_len = tag_lens[l]; + int res = check_usermem_access_fault(USE_MMAP, sync, + map, offset, + tag_len, t); + format_test_name(test_name, TEST_NAME_MAX, + t, sync, map, tag_len, offset); + evaluate_test(res, test_name); + } + } + } + } + } mte_restore_setup(); ksft_print_cnts(); -- GitLab From 16860a209cf1ad20a3b454b1c56d64c9ea9532ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 4 Feb 2022 10:44:39 +0000 Subject: [PATCH 0432/1586] arm64: atomics: remove redundant static branch Due to a historical oversight, we emit a redundant static branch for each atomic/atomic64 operation when CONFIG_ARM64_LSE_ATOMICS is selected. We can safely remove this, making the kernel Image reasonably smaller. When CONFIG_ARM64_LSE_ATOMICS is selected, every LSE atomic operation has two preceding static branches with the same target, e.g. b f7c b f7c mov w0, #0x1 // #1 ldadd w0, w0, [x19] This is because the __lse_ll_sc_body() wrapper uses system_uses_lse_atomics(), which checks both `arm64_const_caps_ready` and `cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]`, each of which emits a static branch. This has been the case since commit: addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics") However, there was never a need to check `arm64_const_caps_ready`, which was itself introduced in commit: 63a1e1c95e60e798 ("arm64/cpufeature: don't use mutex in bringup path") ... so that cpus_have_const_cap() could fall back to checking the `cpu_hwcaps` bitmap prior to the static keys for individual caps becoming enabled. As system_uses_lse_atomics() doesn't check `cpu_hwcaps`, and doesn't need to as we can safely use the LL/SC atomics prior to enabling the `ARM64_HAS_LSE_ATOMICS` static key, it doesn't need to check `arm64_const_caps_ready`. This patch removes the `arm64_const_caps_ready` check from system_uses_lse_atomics(). As the arch_atomic_* routines are meant to be safely usable in noinstr code, I've also marked system_uses_lse_atomics() as __always_inline. This results in one fewer static branch per atomic operation, with the prior example becoming: b f78 mov w0, #0x1 // #1 ldadd w0, w0, [x19] Each static branch consists of the branch itself and an associated __jump_table entry. Removing these has a reasonable impact on the Image size, with a GCC 11.1.0 defconfig v5.17-rc2 Image being reduced by 128KiB: | [mark@lakrids:~/src/linux]% ls -al Image* | -rw-r--r-- 1 mark mark 34619904 Feb 3 18:24 Image.baseline | -rw-r--r-- 1 mark mark 34488832 Feb 3 18:33 Image.onebranch Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Link: https://lore.kernel.org/r/20220204104439.270567-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/lse.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 5d10051c3e62e..29c85810ae690 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -17,12 +17,10 @@ #include extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; -static inline bool system_uses_lse_atomics(void) +static __always_inline bool system_uses_lse_atomics(void) { - return (static_branch_likely(&arm64_const_caps_ready)) && - static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); + return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); } #define __lse_ll_sc_body(op, ...) \ -- GitLab From 5e50f5d4ff31e95599d695df1f0a4e7d2d6fef99 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Sat, 12 Feb 2022 18:59:21 +0100 Subject: [PATCH 0433/1586] security: add sctp_assoc_established hook security_sctp_assoc_established() is added to replace security_inet_conn_established() called in sctp_sf_do_5_1E_ca(), so that asoc can be accessed in security subsystem and save the peer secid to asoc->peer_secid. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Based-on-patch-by: Xin Long Reviewed-by: Xin Long Tested-by: Richard Haines Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- Documentation/security/SCTP.rst | 22 ++++++++++------------ include/linux/lsm_hook_defs.h | 2 ++ include/linux/lsm_hooks.h | 5 +++++ include/linux/security.h | 8 ++++++++ net/sctp/sm_statefuns.c | 8 +++++--- security/security.c | 7 +++++++ 6 files changed, 37 insertions(+), 15 deletions(-) diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index d5fd6ccc3dcbd..406cc68b88087 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - -Also the following security hook has been utilised:: - - security_inet_conn_established() + security_sctp_assoc_established() The usage of these hooks are described below with the SELinux implementation described in the `SCTP SELinux Support`_ chapter. @@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3). @newsk - pointer to new sock structure. -security_inet_conn_established() +security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Called when a COOKIE ACK is received:: +Called when a COOKIE ACK is received, and the peer secid will be +saved into ``@asoc->peer_secid`` for client:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. @@ -134,7 +132,7 @@ Security Hooks used for Association Establishment ------------------------------------------------- The following diagram shows the use of ``security_sctp_bind_connect()``, -``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when +``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when establishing an association. :: @@ -172,7 +170,7 @@ establishing an association. <------------------------------------------- COOKIE ACK | | sctp_sf_do_5_1E_ca | - Call security_inet_conn_established() | + Call security_sctp_assoc_established() | to set the peer label. | | | | If SCTP_SOCKET_TCP or peeled off @@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - security_inet_conn_established() + security_sctp_assoc_established() security_sctp_assoc_request() @@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and @newsk - pointer to new sock structure. -security_inet_conn_established() +security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Called when a COOKIE ACK is received where it sets the connection's peer sid to that in ``@skb``:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a5a724c308d8d..45931d81ccc3f 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3bf5c658bc448..419b5febc3ca5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1046,6 +1046,11 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/security.h b/include/linux/security.h index 6d72772182c82..25b3ef71f495e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } + +static inline int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index cc544a97c4afd..7f342bc127358 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -930,6 +930,11 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Set peer label for connection. */ + if (security_sctp_assoc_established((struct sctp_association *)asoc, + chunk->skb)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ @@ -945,9 +950,6 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); - /* Set peer label for connection. */ - security_inet_conn_established(ep->base.sk, chunk->skb); - /* RFC 2960 5.1 Normal Establishment of an Association * * E) Upon reception of the COOKIE ACK, endpoint "A" will move diff --git a/security/security.c b/security/security.c index e649c8691be27..9663ffcca4b01 100644 --- a/security/security.c +++ b/security/security.c @@ -2393,6 +2393,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, } EXPORT_SYMBOL(security_sctp_sk_clone); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return call_int_hook(sctp_assoc_established, 0, asoc, skb); +} +EXPORT_SYMBOL(security_sctp_assoc_established); + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND -- GitLab From 3eb8eaf2ca3e98d4f6e52bed6148ee8fe3069a3d Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Sat, 12 Feb 2022 18:59:22 +0100 Subject: [PATCH 0434/1586] security: implement sctp_assoc_established hook in selinux Do this by extracting the peer labeling per-association logic from selinux_sctp_assoc_request() into a new helper selinux_sctp_process_new_assoc() and use this helper in both selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This ensures that the peer labeling behavior as documented in Documentation/security/SCTP.rst is applied both on the client and server side: """ An SCTP socket will only have one peer label assigned to it. This will be assigned during the establishment of the first association. Any further associations on this socket will have their packet peer label compared to the sockets peer label, and only if they are different will the ``association`` permission be validated. This is validated by checking the socket peer sid against the received packets peer sid to determine whether the association should be allowed or denied. """ At the same time, it also ensures that the peer label of the association is set to the correct value, such that if it is peeled off into a new socket, the socket's peer label will then be set to the association's peer label, same as it already works on the server side. While selinux_inet_conn_established() (which we are replacing by selinux_sctp_assoc_established() for SCTP) only deals with assigning a peer label to the connection (socket), in case of SCTP we need to also copy the (local) socket label to the association, so that selinux_sctp_sk_clone() can then pick it up for the new socket in case of SCTP peeloff. Careful readers will notice that the selinux_sctp_process_new_assoc() helper also includes the "IPv4 packet received over an IPv6 socket" check, even though it hadn't been in selinux_sctp_assoc_request() before. While such check is not necessary in selinux_inet_conn_request() (because struct request_sock's family field is already set according to the skb's family), here it is needed, as we don't have request_sock and we take the initial family from the socket. In selinux_sctp_assoc_established() it is similarly needed as well (and also selinux_inet_conn_established() already has it). Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Based-on-patch-by: Xin Long Reviewed-by: Xin Long Tested-by: Richard Haines Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- security/selinux/hooks.c | 90 +++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ab32303e66181..dafabb4dcc640 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5238,37 +5238,38 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) sksec->sclass = isec->sclass; } -/* Called whenever SCTP receives an INIT chunk. This happens when an incoming - * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association - * already present). +/* + * Determines peer_secid for the asoc and updates socket's peer label + * if it's the first association on the socket. */ -static int selinux_sctp_assoc_request(struct sctp_association *asoc, - struct sk_buff *skb) +static int selinux_sctp_process_new_assoc(struct sctp_association *asoc, + struct sk_buff *skb) { - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sock *sk = asoc->base.sk; + u16 family = sk->sk_family; + struct sk_security_struct *sksec = sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - u8 peerlbl_active; - u32 peer_sid = SECINITSID_UNLABELED; - u32 conn_sid; - int err = 0; + int err; - if (!selinux_policycap_extsockclass()) - return 0; + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; - peerlbl_active = selinux_peerlbl_enabled(); + if (selinux_peerlbl_enabled()) { + asoc->peer_secid = SECSID_NULL; - if (peerlbl_active) { /* This will return peer_sid = SECSID_NULL if there are * no peer labels, see security_net_peersid_resolve(). */ - err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family, - &peer_sid); + err = selinux_skb_peerlbl_sid(skb, family, &asoc->peer_secid); if (err) return err; - if (peer_sid == SECSID_NULL) - peer_sid = SECINITSID_UNLABELED; + if (asoc->peer_secid == SECSID_NULL) + asoc->peer_secid = SECINITSID_UNLABELED; + } else { + asoc->peer_secid = SECINITSID_UNLABELED; } if (sksec->sctp_assoc_state == SCTP_ASSOC_UNSET) { @@ -5279,8 +5280,8 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, * then it is approved by policy and used as the primary * peer SID for getpeercon(3). */ - sksec->peer_sid = peer_sid; - } else if (sksec->peer_sid != peer_sid) { + sksec->peer_sid = asoc->peer_secid; + } else if (sksec->peer_sid != asoc->peer_secid) { /* Other association peer SIDs are checked to enforce * consistency among the peer SIDs. */ @@ -5288,11 +5289,32 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, ad.u.net = &net; ad.u.net->sk = asoc->base.sk; err = avc_has_perm(&selinux_state, - sksec->peer_sid, peer_sid, sksec->sclass, - SCTP_SOCKET__ASSOCIATION, &ad); + sksec->peer_sid, asoc->peer_secid, + sksec->sclass, SCTP_SOCKET__ASSOCIATION, + &ad); if (err) return err; } + return 0; +} + +/* Called whenever SCTP receives an INIT or COOKIE ECHO chunk. This + * happens on an incoming connect(2), sctp_connectx(3) or + * sctp_sendmsg(3) (with no association already present). + */ +static int selinux_sctp_assoc_request(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + u32 conn_sid; + int err; + + if (!selinux_policycap_extsockclass()) + return 0; + + err = selinux_sctp_process_new_assoc(asoc, skb); + if (err) + return err; /* Compute the MLS component for the connection and store * the information in asoc. This will be used by SCTP TCP type @@ -5300,17 +5322,36 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, * socket to be generated. selinux_sctp_sk_clone() will then * plug this into the new socket. */ - err = selinux_conn_sid(sksec->sid, peer_sid, &conn_sid); + err = selinux_conn_sid(sksec->sid, asoc->peer_secid, &conn_sid); if (err) return err; asoc->secid = conn_sid; - asoc->peer_secid = peer_sid; /* Set any NetLabel labels including CIPSO/CALIPSO options. */ return selinux_netlbl_sctp_assoc_request(asoc, skb); } +/* Called when SCTP receives a COOKIE ACK chunk as the final + * response to an association request (initited by us). + */ +static int selinux_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + + if (!selinux_policycap_extsockclass()) + return 0; + + /* Inherit secid from the parent socket - this will be picked up + * by selinux_sctp_sk_clone() if the association gets peeled off + * into a new socket. + */ + asoc->secid = sksec->sid; + + return selinux_sctp_process_new_assoc(asoc, skb); +} + /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting * based on their @optname. */ @@ -7131,6 +7172,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request), LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone), LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect), + LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established), LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request), LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone), LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established), -- GitLab From f122d103b564e5fb7c82de902c6f8f6cbdf50ec9 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 13 Feb 2022 16:59:02 +0800 Subject: [PATCH 0435/1586] blk-cgroup: set blkg iostat after percpu stat aggregation Don't need to do blkg_iostat_set for top blkg iostat on each CPU, so move it after percpu stat aggregation. Fixes: ef45fe470e1e ("blk-cgroup: show global disk stats in root cgroup io.stat") Signed-off-by: Chengming Zhou Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220213085902.88884-1-zhouchengming@bytedance.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4108d445c73af..fa063c6c0338e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -856,11 +856,11 @@ static void blkcg_fill_root_iostats(void) blk_queue_root_blkg(bdev_get_queue(bdev)); struct blkg_iostat tmp; int cpu; + unsigned long flags; memset(&tmp, 0, sizeof(tmp)); for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - unsigned long flags; cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += @@ -876,11 +876,11 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_WRITE] << 9; tmp.bytes[BLKG_IOSTAT_DISCARD] += cpu_dkstats->sectors[STAT_DISCARD] << 9; - - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); - blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } + + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&blkg->iostat.cur, &tmp); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } } -- GitLab From b62a8486de3ab1d7c2353ec422b9cca3abfcfbcd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:52 +0000 Subject: [PATCH 0436/1586] elfcore: Replace CONFIG_{IA64, UML} checks with a new option As arm64 is about to introduce MTE-specific phdrs in the core dump, add a common CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS option currently selectable by UML_X86 and IA64. Signed-off-by: Catalin Marinas Cc: Eric Biederman Link: https://lore.kernel.org/r/20220131165456.2160675-2-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/ia64/Kconfig | 1 + arch/x86/um/Kconfig | 1 + fs/Kconfig.binfmt | 3 +++ include/linux/elfcore.h | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd83..e003b2473c64d 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 40d6a06e41c81..ead7e5b3a9757 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,6 +8,7 @@ endmenu config UML_X86 def_bool y + select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4d5ae61580aae..68e5862837649 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_BINFMT_ELF_EXTRA_PHDRS + bool + config ARCH_HAVE_ELF_PROT bool diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5a..f8e206e82476c 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ -- GitLab From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:53 +0000 Subject: [PATCH 0437/1586] elf: Introduce the ARM MTE ELF segment type Memory tags will be dumped in the core file as segments with their own type. Discussions with the binutils and the generic ABI community settled on using new definitions in the PT_*PROC space (and to be documented in the processor-specific ABIs). Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this patch since there is no upstream support but the CHERI/BSD community will also reserve: #define PT_ARM_MEMTAG_CHERI (PT_LOPROC + 0x2) #define PT_RISCV_MEMTAG_CHERI (PT_LOPROC + 0x3) Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2a..fe8e5b74cb39e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * -- GitLab From ab1e435ca7913e384ed801210418633eee43a71b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:54 +0000 Subject: [PATCH 0438/1586] arm64: mte: Define the number of bytes for storing the tags in a page Rather than explicitly calculating the number of bytes for a compact tag storage format corresponding to a page, just add a MTE_PAGE_TAG_STORAGE macro. With the current MTE implementation of 4 bits per tag, we store 2 tags in a byte. Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-4-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mte-def.h | 1 + arch/arm64/lib/mte.S | 4 ++-- arch/arm64/mm/mteswap.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 626d359b396e5..14ee86b019c2e 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -11,6 +11,7 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8) #define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index f531dcb95174a..8590af3c98c0b 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user) /* * Save the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_save_page_tags) multitag_transfer_size x7, x5 @@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags) /* * Restore the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_restore_page_tags) multitag_transfer_size x7, x5 diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 7c4ef56265ee1..a9e50e930484a 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages); void *mte_allocate_tag_storage(void) { /* tags granule is 16 bytes, 2 tags stored per byte */ - return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); + return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL); } void mte_free_tag_storage(char *storage) -- GitLab From 6dd8b1a0b6cb3ed93d24110e02e67ff9d006610a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:55 +0000 Subject: [PATCH 0439/1586] arm64: mte: Dump the MTE tags in the core file For each vma mapped with PROT_MTE (the VM_MTE flag set), generate a PT_ARM_MEMTAG_MTE segment in the core file and dump the corresponding tags. The in-file size for such segments is 128 bytes per page. For pages in a VM_MTE vma which are not present in the user page tables or don't have the PG_mte_tagged flag set (e.g. execute-only), just write zeros in the core file. An example of program headers for two vmas, one 2-page, the other 4-page long: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align ... LOAD 0x030000 0x0000ffff80034000 0x0000000000000000 0x000000 0x002000 RW 0x1000 LOAD 0x030000 0x0000ffff80036000 0x0000000000000000 0x004000 0x004000 RW 0x1000 ... LOPROC+0x1 0x05b000 0x0000ffff80034000 0x0000000000000000 0x000100 0x002000 0 LOPROC+0x1 0x05b100 0x0000ffff80036000 0x0000000000000000 0x000200 0x004000 0 Signed-off-by: Catalin Marinas Acked-by: Luis Machado Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220131165456.2160675-5-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/elfcore.c | 123 ++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 arch/arm64/kernel/elfcore.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cbcd42decb2ad..b55c11796fad2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88b3e2a214084..986837d7ec82d 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c new file mode 100644 index 0000000000000..3455ee4acc04c --- /dev/null +++ b/arch/arm64/kernel/elfcore.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include +#include + +#define for_each_mte_vma(tsk, vma) \ + if (system_supports_mte()) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + if (vma->vm_flags & VM_MTE) + +static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; +} + +/* Derived from dump_user_range(); start/end must be page-aligned */ +static int mte_dump_tag_range(struct coredump_params *cprm, + unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + char tags[MTE_PAGE_TAG_STORAGE]; + struct page *page = get_dump_page(addr); + + /* + * get_dump_page() returns NULL when encountering an empty + * page table entry that would otherwise have been filled with + * the zero page. Skip the equivalent tag dump which would + * have been all zeros. + */ + if (!page) { + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + /* + * Pages mapped in user space as !pte_access_permitted() (e.g. + * PROT_EXEC only) may not have the PG_mte_tagged flag set. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + put_page(page); + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + mte_save_page_tags(page_address(page), tags); + put_page(page); + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) + return 0; + } + + return 1; +} + +Elf_Half elf_core_extra_phdrs(void) +{ + struct vm_area_struct *vma; + int vma_count = 0; + + for_each_mte_vma(current, vma) + vma_count++; + + return vma_count; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + struct vm_area_struct *vma; + + for_each_mte_vma(current, vma) { + struct elf_phdr phdr; + + phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = mte_vma_tag_dump_size(vma); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = 0; + phdr.p_align = 0; + + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + struct vm_area_struct *vma; + size_t data_size = 0; + + for_each_mte_vma(current, vma) + data_size += mte_vma_tag_dump_size(vma); + + return data_size; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + struct vm_area_struct *vma; + + for_each_mte_vma(current, vma) { + if (vma->vm_flags & VM_DONTDUMP) + continue; + + if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + return 0; + } + + return 1; +} -- GitLab From 731451ab3c0c6fe88142dbc73a74c71bd92a5cff Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:56 +0000 Subject: [PATCH 0440/1586] arm64: mte: Document the core dump file format Add the program header definition and data layout for the PT_ARM_MEMTAG_MTE segments. Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-6-catalin.marinas@arm.com Signed-off-by: Will Deacon --- .../arm64/memory-tagging-extension.rst | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb6..5a70d7a3ca129 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -213,6 +213,29 @@ address ABI control and MTE configuration of a process as per the Documentation/arm64/tagged-address-abi.rst and above. The corresponding ``regset`` is 1 element of 8 bytes (``sizeof(long))``). +Core dump support +----------------- + +The allocation tags for user memory mapped with ``PROT_MTE`` are dumped +in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The +program header for such segment is defined as: + +:``p_type``: ``PT_ARM_MEMTAG_MTE`` +:``p_flags``: 0 +:``p_offset``: segment file offset +:``p_vaddr``: segment virtual address, same as the corresponding + ``PT_LOAD`` segment +:``p_paddr``: 0 +:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32`` + (two 4-bit tags cover 32 bytes of memory) +:``p_memsz``: segment size in memory, same as the corresponding + ``PT_LOAD`` segment +:``p_align``: 0 + +The tags are stored in the core file at ``p_offset`` as two 4-bit tags +in a byte. With the tag granule of 16 bytes, a 4K page requires 128 +bytes in the core file. + Example of correct usage ======================== -- GitLab From a693396fd569c0a33e4b0f82398fbd6410a7beec Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 10 Feb 2022 17:36:30 -0600 Subject: [PATCH 0441/1586] Documentation: dev-tools: clarify KTAP specification wording Add the spec version to the title line. Explain likely source of "Unknown lines". "Unknown lines" in nested tests are optionally indented. Add "Unknown lines" items to differences between TAP & KTAP list Convert "Major differences between TAP and KTAP" from a bullet list to a table. The bullet list was being formatted as a single paragraph. Reviewed-by: Tim Bird Reviewed-by: David Gow Reviewed-by: Shuah Khan Signed-off-by: Frank Rowand Reviewed-by: Brendan Higgins Link: https://lore.kernel.org/r/20220210233630.3304495-1-frowand.list@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/ktap.rst | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst index 878530cb9c271..d7fe05de40b44 100644 --- a/Documentation/dev-tools/ktap.rst +++ b/Documentation/dev-tools/ktap.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 -======================================== -The Kernel Test Anything Protocol (KTAP) -======================================== +=================================================== +The Kernel Test Anything Protocol (KTAP), version 1 +=================================================== TAP, or the Test Anything Protocol is a format for specifying test results used by a number of projects. It's website and specification are found at this `link @@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of the four formats for lines described above. This is allowed, however, they will not influence the status of the tests. +This is an important difference from TAP. Kernel tests may print messages +to the system console or a log file. Both of these destinations may contain +messages either from unrelated kernel or userspace activity, or kernel +messages from non-test code that is invoked by the test. The kernel code +invoked by the test likely is not aware that a test is in progress and +thus can not print the message as a diagnostic message. + Nested tests ------------ @@ -186,10 +193,13 @@ starting with another KTAP version line and test plan, and end with the overall result. If one of the subtests fail, for example, the parent test should also fail. -Additionally, all result lines in a subtest should be indented. One level of +Additionally, all lines in a subtest should be indented. One level of indentation is two spaces: " ". The indentation should begin at the version line and should end before the parent test's result line. +"Unknown lines" are not considered to be lines in a subtest and thus are +allowed to be either indented or not indented. + An example of a test with two nested subtests: .. code-block:: @@ -224,10 +234,15 @@ An example format with multiple levels of nested testing: Major differences between TAP and KTAP -------------------------------------- -Note the major differences between the TAP and KTAP specification: -- yaml and json are not recommended in diagnostic messages -- TODO directive not recognized -- KTAP allows for an arbitrary number of tests to be nested +================================================== ========= =============== +Feature TAP KTAP +================================================== ========= =============== +yaml and json in diagnosic message ok not recommended +TODO directive ok not recognized +allows an arbitrary number of tests to be nested no yes +"Unknown lines" are in category of "Anything else" yes no +"Unknown lines" are incorrect allowed +================================================== ========= =============== The TAP14 specification does permit nested tests, but instead of using another nested version line, uses a line of the form -- GitLab From 013ebb6d822a51ccc8ad162e7c8d02fbd7e90dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 10 Feb 2022 20:22:00 +0100 Subject: [PATCH 0442/1586] Documentation: Fix links for udftools project and pktcdvd tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20220210192200.30828-1-pali@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/cdrom/packet-writing.rst | 4 ++-- Documentation/userspace-api/ioctl/ioctl-number.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst index c5c957195a5a7..43db58c50d292 100644 --- a/Documentation/cdrom/packet-writing.rst +++ b/Documentation/cdrom/packet-writing.rst @@ -11,7 +11,7 @@ Getting started quick - Compile and install kernel and modules, reboot. - You need the udftools package (pktsetup, mkudffs, cdrwtool). - Download from http://sourceforge.net/projects/linux-udf/ + Download from https://github.com/pali/udftools - Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute as appropriate):: @@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface Since Linux 2.6.20, the pktcdvd module has a sysfs interface and can be controlled by it. For example the "pktcdvd" tool uses -this interface. (see http://tom.ist-im-web.de/download/pktcdvd ) +this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd ) "pktcdvd" works similar to "pktsetup", e.g.:: diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 687efcf245c13..559c436276c6a 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -253,7 +253,7 @@ Code Seq# Include File Comments 'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system 'l' 40-7F linux/udf_fs_i.h in development: - + 'm' 00-09 linux/mmtimer.h conflict! 'm' all linux/mtio.h conflict! 'm' all linux/soundcard.h conflict! -- GitLab From 339cf5a2c6fb8559f30b9d4bd82c1dc4d7a16468 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 8 Feb 2022 21:37:16 +0800 Subject: [PATCH 0443/1586] docs/zh_CN: Add energy-model Chinese translation Translate power/energy-model.rst into Chinese. Signed-off-by: Tang Yizhou Reviewed-by: Alex Shi Link: https://lore.kernel.org/r/20220208133716.24070-1-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/power/energy-model.rst | 190 ++++++++++++++++++ .../translations/zh_CN/power/index.rst | 2 +- 2 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 Documentation/translations/zh_CN/power/energy-model.rst diff --git a/Documentation/translations/zh_CN/power/energy-model.rst b/Documentation/translations/zh_CN/power/energy-model.rst new file mode 100644 index 0000000000000..c7da1b6aefeee --- /dev/null +++ b/Documentation/translations/zh_CN/power/energy-model.rst @@ -0,0 +1,190 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/power/energy-model.rst + +:翻译: + + 唐艺舟 Tang Yizhou + +============ +设备能量模型 +============ + +1. 概述 +------- + +能量模型(EM)框架是一种驱动程序与内核子系统之间的接口。其中驱动程序了解不同 +性能层级的设备所消耗的功率,而内核子系统愿意使用该信息做出能量感知决策。 + +设备所消耗的功率的信息来源在不同的平台上可能有很大的不同。这些功率成本在某些 +情况下可以使用设备树数据来估算。在其它情况下,固件会更清楚。或者,用户空间可能 +是最清楚的。以此类推。为了避免每一个客户端子系统对每一种可能的信息源自己重新 +实现支持,EM框架作为一个抽象层介入,它在内核中对功率成本表的格式进行标准化, +因此能够避免多余的工作。 + +功率值可以用毫瓦或“抽象刻度”表示。多个子系统可能使用EM,由系统集成商来检查 +功率值刻度类型的要求是否满足。可以在能量感知调度器的文档中找到一个例子 +Documentation/scheduler/sched-energy.rst。对于一些子系统,比如热能或 +powercap,用“抽象刻度”描述功率值可能会导致问题。这些子系统对过去使用的功率的 +估算值更感兴趣,因此可能需要真实的毫瓦。这些要求的一个例子可以在智能功率分配 +Documentation/driver-api/thermal/power_allocator.rst文档中找到。 + +内核子系统可能(基于EM内部标志位)实现了对EM注册设备是否具有不一致刻度的自动 +检查。要记住的重要事情是,当功率值以“抽象刻度”表示时,从中推导以毫焦耳为单位 +的真实能量消耗是不可能的。 + +下图描述了一个驱动的例子(这里是针对Arm的,但该方法适用于任何体系结构),它 +向EM框架提供了功率成本,感兴趣的客户端可从中读取数据:: + + +---------------+ +-----------------+ +---------------+ + | Thermal (IPA) | | Scheduler (EAS) | | Other | + +---------------+ +-----------------+ +---------------+ + | | em_cpu_energy() | + | | em_cpu_get() | + +---------+ | +---------+ + | | | + v v v + +---------------------+ + | Energy Model | + | Framework | + +---------------------+ + ^ ^ ^ + | | | em_dev_register_perf_domain() + +----------+ | +---------+ + | | | + +---------------+ +---------------+ +--------------+ + | cpufreq-dt | | arm_scmi | | Other | + +---------------+ +---------------+ +--------------+ + ^ ^ ^ + | | | + +--------------+ +---------------+ +--------------+ + | Device Tree | | Firmware | | ? | + +--------------+ +---------------+ +--------------+ + +对于CPU设备,EM框架管理着系统中每个“性能域”的功率成本表。一个性能域是一组 +性能一起伸缩的CPU。性能域通常与CPUFreq策略具有1对1映射。一个性能域中的 +所有CPU要求具有相同的微架构。不同性能域中的CPU可以有不同的微架构。 + + +2. 核心API +---------- + +2.1 配置选项 +^^^^^^^^^^^^ + +必须使能CONFIG_ENERGY_MODEL才能使用EM框架。 + + +2.2 性能域的注册 +^^^^^^^^^^^^^^^^ + +“高级”EM的注册 +~~~~~~~~~~~~~~~~ + +“高级”EM因它允许驱动提供更精确的功率模型而得名。它并不受限于框架中的一些已 +实现的数学公式(就像“简单”EM那样)。它可以更好地反映每个性能状态的实际功率 +测量。因此,在EM静态功率(漏电流功率)是重要的情况下,应该首选这种注册方式。 + +驱动程序应通过以下API将性能域注册到EM框架中:: + + int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *cpus, bool milliwatts); + +驱动程序必须提供一个回调函数,为每个性能状态返回<频率,功率>元组。驱动程序 +提供的回调函数可以自由地从任何相关位置(DT、固件......)以及以任何被认为是 +必要的方式获取数据。只有对于CPU设备,驱动程序必须使用cpumask指定性能域的CPU。 +对于CPU以外的其他设备,最后一个参数必须被设置为NULL。 + +最后一个参数“milliwatts”(毫瓦)设置成正确的值是很重要的,使用EM的内核 +子系统可能会依赖这个标志来检查所有的EM设备是否使用相同的刻度。如果有不同的 +刻度,这些子系统可能决定:返回警告/错误,停止工作或崩溃(panic)。 + +关于实现这个回调函数的驱动程序的例子,参见第3节。或者在第2.4节阅读这个API +的更多文档。 + + +“简单”EM的注册 +~~~~~~~~~~~~~~~~ + +“简单”EM是用框架的辅助函数cpufreq_register_em_with_opp()注册的。它实现了 +一个和以下数学公式紧密相关的功率模型:: + + Power = C * V^2 * f + +使用这种方法注册的EM可能无法正确反映真实设备的物理特性,例如当静态功率 +(漏电流功率)很重要时。 + + +2.3 访问性能域 +^^^^^^^^^^^^^^ + +有两个API函数提供对能量模型的访问。em_cpu_get()以CPU id为参数,em_pd_get() +以设备指针为参数。使用哪个接口取决于子系统,但对于CPU设备来说,这两个函数都返 +回相同的性能域。 + +对CPU的能量模型感兴趣的子系统可以通过em_cpu_get() API检索它。在创建性能域时 +分配一次能量模型表,它保存在内存中不被修改。 + +一个性能域所消耗的能量可以使用em_cpu_energy() API来估算。该估算假定CPU设备 +使用的CPUfreq监管器是schedutil。当前该计算不能提供给其它类型的设备。 + +关于上述API的更多细节可以在 ```` 或第2.4节中找到。 + + +2.4 API的细节描述 +^^^^^^^^^^^^^^^^^ +参见 include/linux/energy_model.h 和 kernel/power/energy_model.c 的kernel doc。 + +3. 驱动示例 +----------- + +CPUFreq框架支持专用的回调函数,用于为指定的CPU(们)注册EM: +cpufreq_driver::register_em()。这个回调必须为每个特定的驱动程序正确实现, +因为框架会在设置过程中适时地调用它。本节提供了一个简单的例子,展示CPUFreq驱动 +在能量模型框架中使用(假的)“foo”协议注册性能域。该驱动实现了一个est_power() +函数提供给EM框架:: + + -> drivers/cpufreq/foo_cpufreq.c + + 01 static int est_power(unsigned long *mW, unsigned long *KHz, + 02 struct device *dev) + 03 { + 04 long freq, power; + 05 + 06 /* 使用“foo”协议设置频率上限 */ + 07 freq = foo_get_freq_ceil(dev, *KHz); + 08 if (freq < 0); + 09 return freq; + 10 + 11 /* 估算相关频率下设备的功率成本 */ + 12 power = foo_estimate_power(dev, freq); + 13 if (power < 0); + 14 return power; + 15 + 16 /* 将这些值返回给EM框架 */ + 17 *mW = power; + 18 *KHz = freq; + 19 + 20 return 0; + 21 } + 22 + 23 static void foo_cpufreq_register_em(struct cpufreq_policy *policy) + 24 { + 25 struct em_data_callback em_cb = EM_DATA_CB(est_power); + 26 struct device *cpu_dev; + 27 int nr_opp; + 28 + 29 cpu_dev = get_cpu_device(cpumask_first(policy->cpus)); + 30 + 31 /* 查找该策略支持的OPP数量 */ + 32 nr_opp = foo_get_nr_opp(policy); + 33 + 34 /* 并注册新的性能域 */ + 35 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, + 36 true); + 37 } + 38 + 39 static struct cpufreq_driver foo_cpufreq_driver = { + 40 .register_em = foo_cpufreq_register_em, + 41 }; diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst index ad80a9e80b7cd..bc54983ba5156 100644 --- a/Documentation/translations/zh_CN/power/index.rst +++ b/Documentation/translations/zh_CN/power/index.rst @@ -14,6 +14,7 @@ .. toctree:: :maxdepth: 1 + energy-model opp TODOList: @@ -22,7 +23,6 @@ TODOList: * basic-pm-debugging * charger-manager * drivers-testing - * energy-model * freezing-of-tasks * pci * pm_qos_interface -- GitLab From d535e6c25a8131324aef4534e14d09b4d81c98a9 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 8 Feb 2022 10:01:05 +0800 Subject: [PATCH 0444/1586] docs/zh_CN: Add sched-energy Chinese translation Translate scheduler/sched-energy.rst into Chinese. Signed-off-by: Tang Yizhou Reviewed-by: Alex Shi Reviewed-by: Yanteng Si Link: https://lore.kernel.org/r/20220208020105.14117-1-tangyizhou@huawei.com Signed-off-by: Jonathan Corbet --- .../translations/zh_CN/scheduler/index.rst | 4 +- .../zh_CN/scheduler/sched-energy.rst | 351 ++++++++++++++++++ 2 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 Documentation/translations/zh_CN/scheduler/sched-energy.rst diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst index f8f8f35d53c71..ab79259802668 100644 --- a/Documentation/translations/zh_CN/scheduler/index.rst +++ b/Documentation/translations/zh_CN/scheduler/index.rst @@ -5,6 +5,7 @@ :翻译: 司延腾 Yanteng Si + 唐艺舟 Tang Yizhou :校译: @@ -23,13 +24,12 @@ Linux调度器 sched-design-CFS sched-domains sched-capacity + sched-energy TODOList: - sched-bwc sched-deadline - sched-energy sched-nice-design sched-rt-group sched-stats diff --git a/Documentation/translations/zh_CN/scheduler/sched-energy.rst b/Documentation/translations/zh_CN/scheduler/sched-energy.rst new file mode 100644 index 0000000000000..fdbf6cfeea933 --- /dev/null +++ b/Documentation/translations/zh_CN/scheduler/sched-energy.rst @@ -0,0 +1,351 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/scheduler/sched-energy.rst + +:翻译: + + 唐艺舟 Tang Yizhou + +============ +能量感知调度 +============ + +1. 简介 +------- + +能量感知调度(EAS)使调度器有能力预测其决策对CPU所消耗的能量的影响。EAS依靠 +一个能量模型(EM)来为每个任务选择一个节能的CPU,同时最小化对吞吐率的影响。 +本文档致力于介绍介绍EAS是如何工作的,它背后的主要设计决策是什么,以及使其运行 +所需的条件细节。 + +在进一步阅读之前,请注意,在撰写本文时:: + + /!\ EAS不支持对称CPU拓扑的平台 /!\ + +EAS只在异构CPU拓扑结构(如Arm大小核,big.LITTLE)上运行。因为在这种情况下, +通过调度来节约能量的潜力是最大的。 + +EAS实际使用的EM不是由调度器维护的,而是一个专门的框架。关于这个框架的细节和 +它提供的内容,请参考其文档(见Documentation/power/energy-model.rst)。 + + +2. 背景和术语 +------------- + +从一开始就说清楚定义: + - 能量 = [焦耳] (比如供电设备上的电池提供的资源) + - 功率 = 能量/时间 = [焦耳/秒] = [瓦特] + + EAS的目标是最小化能量消耗,同时仍能将工作完成。也就是说,我们要最大化:: + + 性能 [指令数/秒] + ---------------- + 功率 [瓦特] + +它等效于最小化:: + + 能量 [焦耳] + ----------- + 指令数 + +同时仍然获得“良好”的性能。当前调度器只考虑性能目标,因此该式子本质上是一个 +可选的优化目标,它同时考虑了两个目标:能量效率和性能。 + +引入EM的想法是为了让调度器评估其决策的影响,而不是盲目地应用可能仅在部分 +平台有正面效果的节能技术。同时,EM必须尽可能的简单,以最小化调度器的时延 +影响。 + +简而言之,EAS改变了CFS任务分配给CPU的方式。当调度器决定一个任务应该在哪里 +运行时(在唤醒期间),EM被用来在不损害系统吞吐率的情况下,从几个较好的候选 +CPU中挑选一个经预测能量消耗最优的CPU。EAS的预测依赖于对平台拓扑结构特定元素 +的了解,包括CPU的“算力”,以及它们各自的能量成本。 + + +3. 拓扑信息 +----------- + +EAS(以及调度器的剩余部分)使用“算力”的概念来区分不同计算吞吐率的CPU。一个 +CPU的“算力”代表了它在最高频率下运行时能完成的工作量,且这个值是相对系统中 +算力最大的CPU而言的。算力值被归一化为1024以内,并且可与由实体负载跟踪 +(PELT)机制算出的利用率信号做对比。由于有算力值和利用率值,EAS能够估计一个 +任务/CPU有多大/有多忙,并在评估性能与能量时将其考虑在内。CPU算力由特定体系 +结构实现的arch_scale_cpu_capacity()回调函数提供。 + +EAS使用的其余平台信息是直接从能量模型(EM)框架中读取的。一个平台的EM是一张 +表,表中每项代表系统中一个“性能域”的功率成本。(若要了解更多关于性能域的细节, +见Documentation/power/energy-model.rst) + +当调度域被建立或重新建立时,调度器管理对拓扑代码中EM对象的引用。对于每个根域 +(rd),调度器维护一个与当前rd->span相交的所有性能域的单向链表。链表中的每个 +节点都包含一个指向EM框架所提供的结构体em_perf_domain的指针。 + +链表被附加在根域上,以应对独占的cpuset的配置。由于独占的cpuset的边界不一定与 +性能域的边界一致,不同根域的链表可能包含重复的元素。 + +示例1 + 让我们考虑一个有12个CPU的平台,分成3个性能域,(pd0,pd4和pd8),按以下 + 方式组织:: + + CPUs: 0 1 2 3 4 5 6 7 8 9 10 11 + PDs: |--pd0--|--pd4--|---pd8---| + RDs: |----rd1----|-----rd2-----| + + 现在,考虑用户空间决定将系统分成两个独占的cpusets,因此创建了两个独立的根域, + 每个根域包含6个CPU。这两个根域在上图中被表示为rd1和rd2。由于pd4与rd1和rd2 + 都有交集,它将同时出现于附加在这两个根域的“->pd”链表中: + + * rd1->pd: pd0 -> pd4 + * rd2->pd: pd4 -> pd8 + + 请注意,调度器将为pd4创建两个重复的链表节点(每个链表中各一个)。然而这 + 两个节点持有指向同一个EM框架的共享数据结构的指针。 + +由于对这些链表的访问可能与热插拔及其它事件并发发生,因此它们受RCU锁保护,就像 +被调度器操控的拓扑结构体中剩下字段一样。 + +EAS同样维护了一个静态键(sched_energy_present),当至少有一个根域满足EAS +启动的所有条件时,这个键就会被启动。在第6节中总结了这些条件。 + + +4. 能量感知任务放置 +------------------- + +EAS覆盖了CFS的任务唤醒平衡代码。在唤醒平衡时,它使用平台的EM和PELT信号来选择节能 +的目标CPU。当EAS被启用时,select_task_rq_fair()调用find_energy_efficient_cpu() +来做任务放置决定。这个函数寻找在每个性能域中寻找具有最高剩余算力(CPU算力 - CPU +利用率)的CPU,因为它能让我们保持最低的频率。然后,该函数检查将任务放在新CPU相较 +依然放在之前活动的prev_cpu是否可以节省能量。 + +如果唤醒的任务被迁移,find_energy_efficient_cpu()使用compute_energy()来估算 +系统将消耗多少能量。compute_energy()检查各CPU当前的利用率情况,并尝试调整来 +“模拟”任务迁移。EM框架提供了API em_pd_energy()计算每个性能域在给定的利用率条件 +下的预期能量消耗。 + +下面详细介绍一个优化能量消耗的任务放置决策的例子。 + +示例2 + 让我们考虑一个有两个独立性能域的(伪)平台,每个性能域含有2个CPU。CPU0和CPU1 + 是小核,CPU2和CPU3是大核。 + + 调度器必须决定将任务P放在哪个CPU上,这个任务的util_avg = 200(平均利用率), + prev_cpu = 0(上一次运行在CPU0)。 + + 目前CPU的利用率情况如下图所示。CPU 0-3的util_avg分别为400、100、600和500。 + 每个性能域有三个操作性能值(OPP)。与每个OPP相关的CPU算力和功率成本列在能量 + 模型表中。P的util_avg在图中显示为"PP":: + + CPU util. + 1024 - - - - - - - Energy Model + +-----------+-------------+ + | Little | Big | + 768 ============= +-----+-----+------+------+ + | Cap | Pwr | Cap | Pwr | + +-----+-----+------+------+ + 512 =========== - ##- - - - - | 170 | 50 | 512 | 400 | + ## ## | 341 | 150 | 768 | 800 | + 341 -PP - - - - ## ## | 512 | 300 | 1024 | 1700 | + PP ## ## +-----+-----+------+------+ + 170 -## - - - - ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + Current OPP: ===== Other OPP: - - - util_avg (100 each): ## + + + find_energy_efficient_cpu()将首先在两个性能域中寻找具有最大剩余算力的CPU。 + 在这个例子中是CPU1和CPU3。然后,它将估算,当P被放在它们中的任意一个时,系统的 + 能耗,并检查这样做是否会比把P放在CPU0上节省一些能量。EAS假定OPPs遵循利用率 + (这与CPUFreq监管器schedutil的行为一致。关于这个问题的更多细节,见第6节)。 + + **情况1. P被迁移到CPU1**:: + + 1024 - - - - - - - + + Energy calculation: + 768 ============= * CPU0: 200 / 341 * 150 = 88 + * CPU1: 300 / 341 * 150 = 131 + * CPU2: 600 / 768 * 800 = 625 + 512 - - - - - - - ##- - - - - * CPU3: 500 / 768 * 800 = 520 + ## ## => total_energy = 1364 + 341 =========== ## ## + PP ## ## + 170 -## - - PP- ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + + **情况2. P被迁移到CPU3**:: + + 1024 - - - - - - - + + Energy calculation: + 768 ============= * CPU0: 200 / 341 * 150 = 88 + * CPU1: 100 / 341 * 150 = 43 + PP * CPU2: 600 / 768 * 800 = 625 + 512 - - - - - - - ##- - -PP - * CPU3: 700 / 768 * 800 = 729 + ## ## => total_energy = 1485 + 341 =========== ## ## + ## ## + 170 -## - - - - ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + **情况3. P依旧留在prev_cpu/CPU0**:: + + 1024 - - - - - - - + + Energy calculation: + 768 ============= * CPU0: 400 / 512 * 300 = 234 + * CPU1: 100 / 512 * 300 = 58 + * CPU2: 600 / 768 * 800 = 625 + 512 =========== - ##- - - - - * CPU3: 500 / 768 * 800 = 520 + ## ## => total_energy = 1437 + 341 -PP - - - - ## ## + PP ## ## + 170 -## - - - - ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + 从这些计算结果来看,情况1的总能量最低。所以从节约能量的角度看,CPU1是最佳候选 + 者。 + +大核通常比小核更耗电,因此主要在任务不适合在小核运行时使用。然而,小核并不总是比 +大核节能。举例来说,对于某些系统,小核的高OPP可能比大核的低OPP能量消耗更高。因此, +如果小核在某一特定时间点刚好有足够的利用率,在此刻被唤醒的小任务放在大核执行可能 +会更节能,尽管它在小核上运行也是合适的。 + +即使在大核所有OPP都不如小核OPP节能的情况下,在某些特定条件下,令小任务运行在大核 +上依然可能节能。事实上,将一个任务放在一个小核上可能导致整个性能域的OPP提高,这将 +增加已经在该性能域运行的任务的能量成本。如果唤醒的任务被放在一个大核上,它的执行 +成本可能比放在小核上更高,但这不会影响小核上的其它任务,这些任务将继续以较低的OPP +运行。因此,当考虑CPU消耗的总能量时,在大核上运行一个任务的额外成本可能小于为所有 +其它运行在小核的任务提高OPP的成本。 + +上面的例子几乎不可能以一种通用的方式得到正确的结果;同时,对于所有平台,在不知道 +系统所有CPU每个不同OPP的运行成本时,也无法得到正确的结果。得益于基于EM的设计, +EAS应该能够正确处理这些问题而不会引发太多麻烦。然而,为了确保对高利用率场景的 +吞吐率造成的影响最小化,EAS同样实现了另外一种叫“过度利用率”的机制。 + + +5. 过度利用率 +------------- + +从一般的角度来看,EAS能提供最大帮助的是那些涉及低、中CPU利用率的使用场景。每当CPU +密集型的长任务运行,它们将需要所有的可用CPU算力,调度器将没有什么办法来节省能量同时 +又不损害吞吐率。为了避免EAS损害性能,一旦CPU被使用的算力超过80%,它将被标记为“过度 +利用”。只要根域中没有CPU是过度利用状态,负载均衡被禁用,而EAS将覆盖唤醒平衡代码。 +EAS很可能将负载放置在系统中能量效率最高的CPU而不是其它CPU上,只要不损害吞吐率。 +因此,负载均衡器被禁用以防止它打破EAS发现的节能任务放置。当系统未处于过度利用状态时, +这样做是安全的,因为低于80%的临界点意味着: + + a. 所有的CPU都有一些空闲时间,所以EAS使用的利用率信号很可能准确地代表各种任务 + 的“大小”。 + b. 所有任务,不管它们的nice值是多大,都应该被提供了足够多的CPU算力。 + c. 既然有多余的算力,那么所有的任务都必须定期阻塞/休眠,在唤醒时进行平衡就足够 + 了。 + +只要一个CPU利用率超过80%的临界点,上述三个假设中至少有一个是不正确的。在这种情况下, +整个根域的“过度利用”标志被设置,EAS被禁用,负载均衡器被重新启用。通过这样做,调度器 +又回到了在CPU密集的条件下基于负载的算法做负载均衡。这更好地尊重了任务的nice值。 + +由于过度利用率的概念在很大程度上依赖于检测系统中是否有一些空闲时间,所以必须考虑 +(比CFS)更高优先级的调度类(以及中断)“窃取”的CPU算力。像这样,对过度使用率的检测 +不仅要考虑CFS任务使用的算力,还需要考虑其它调度类和中断。 + + +6. EAS的依赖和要求 +------------------ + +能量感知调度依赖系统的CPU具有特定的硬件属性,以及内核中的其它特性被启用。本节列出 +了这些依赖,并对如何满足这些依赖提供了提示。 + + +6.1 - 非对称CPU拓扑 +^^^^^^^^^^^^^^^^^^^ + + +如简介所提,目前只有非对称CPU拓扑结构的平台支持EAS。通过在运行时查询 +SD_ASYM_CPUCAPACITY_FULL标志位是否在创建调度域时已设置来检查这一要求是否满足。 + +参阅Documentation/scheduler/sched-capacity.rst以了解在sched_domain层次结构 +中设置此标志位所需满足的要求。 + +请注意,EAS并非从根本上与SMP不兼容,但在SMP平台上还没有观察到明显的节能。这一 +限制可以在将来进行修改,如果被证明不是这样的话。 + + +6.2 - 当前的能量模型 +^^^^^^^^^^^^^^^^^^^^ + +EAS使用一个平台的EM来估算调度决策对能量的影响。因此,你的平台必须向EM框架提供 +能量成本表,以启动EAS。要做到这一点,请参阅文档 +Documentation/power/energy-model.rst中的独立EM框架部分。 + +另请注意,调度域需要在EM注册后重建,以便启动EAS。 + +EAS使用EM对能量使用率进行预测决策,因此它在检查任务放置的可能选项时更加注重 +差异。对于EAS来说,EM的功率值是以毫瓦还是以“抽象刻度”为单位表示并不重要。 + + + +6.3 - 能量模型复杂度 +^^^^^^^^^^^^^^^^^^^^ + +任务唤醒路径是时延敏感的。当一个平台的EM太复杂(太多CPU,太多性能域,太多状态 +等),在唤醒路径中使用它的成本就会升高到不可接受。能量感知唤醒算法的复杂度为: + + C = Nd * (Nc + Ns) + +其中:Nd是性能域的数量;Nc是CPU的数量;Ns是OPP的总数(例如:对于两个性能域, +每个域有4个OPP,则Ns = 8)。 + +当调度域建立时,复杂性检查是在根域上进行的。如果一个根域的复杂度C恰好高于完全 +主观设定的EM_MAX_COMPLEXITY阈值(在本文写作时,是2048),则EAS不会在此根域 +启动。 + +如果你的平台的能量模型的复杂度太高,EAS无法在这个根域上使用,但你真的想用, +那么你就只剩下两个可能的选择: + + 1. 将你的系统拆分成分离的、较小的、使用独占cpuset的根域,并在每个根域局部 + 启用EAS。这个方案的好处是开箱即用,但缺点是无法在根域之间实现负载均衡, + 这可能会导致总体系统负载不均衡。 + 2. 提交补丁以降低EAS唤醒算法的复杂度,从而使其能够在合理的时间内处理更大 + 的EM。 + + +6.4 - Schedutil监管器 +^^^^^^^^^^^^^^^^^^^^^ + +EAS试图预测CPU在不久的将来会在哪个OPP下运行,以估算它们的能量消耗。为了做到 +这一点,它假定CPU的OPP跟随CPU利用率变化而变化。 + +尽管在实践中很难对这一假设的准确性提供硬性保证(因为,举例来说硬件可能不会做 +它被要求做的事情),相对于其他CPUFreq监管器,schedutil至少_请求_使用利用率 +信号计算的频率。因此,与EAS一起使用的唯一合理的监管器是schedutil,因为它是 +唯一一个在频率请求和能量预测之间提供某种程度的一致性的监管器。 + +不支持将EAS与schedutil之外的任何其它监管器一起使用。 + + +6.5 刻度不变性使用率信号 +^^^^^^^^^^^^^^^^^^^^^^^^ + +为了对不同的CPU和所有的性能状态做出准确的预测,EAS需要频率不变的和CPU不变的 +PELT信号。这些信号可以通过每个体系结构定义的arch_scale{cpu,freq}_capacity() +回调函数获取。 + +不支持在没有实现这两个回调函数的平台上使用EAS。 + + +6.6 多线程(SMT) +^^^^^^^^^^^^^^^^^ + +当前实现的EAS是不感知SMT的,因此无法利用多线程硬件节约能量。EAS认为线程是独立的 +CPU,这实际上对性能和能量消耗都是不利的。 + +不支持在SMT上使用EAS。 -- GitLab From 4fbe7b19a9485db0a53efc018fe88db25846c89f Mon Sep 17 00:00:00 2001 From: Ethan Dye Date: Mon, 7 Feb 2022 16:54:42 -0700 Subject: [PATCH 0445/1586] docs: Fix wording in optional zram feature docs This fixes some simple grammar errors in the documentation for zram, specifically errors in the optional feature section of the zram documentation. Signed-off-by: Ethan Dye Link: https://lore.kernel.org/r/20220207235442.95090-1-mrtops03@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/blockdev/zram.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 3e11926a4df95..54fe63745ed8e 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via:: echo /dev/sda5 > /sys/block/zramX/backing_dev -before disksize setting. It supports only partition at this moment. -If admin wants to use incompressible page writeback, they could do via:: +before disksize setting. It supports only partitions at this moment. +If admin wants to use incompressible page writeback, they could do it via:: echo huge > /sys/block/zramX/writeback @@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via:: echo idle > /sys/block/zramX/writeback -With the command, zram writeback idle pages from memory to the storage. +With the command, zram will writeback idle pages from memory to the storage. -If admin want to write a specific page in zram device to backing device, +If an admin wants to write a specific page in zram device to the backing device, they could write a page index into the interface. echo "page_index=1251" > /sys/block/zramX/writeback @@ -354,7 +354,7 @@ to guarantee storage health for entire product life. To overcome the concern, zram supports "writeback_limit" feature. The "writeback_limit_enable"'s default value is 0 so that it doesn't limit -any writeback. IOW, if admin wants to apply writeback budget, he should +any writeback. IOW, if admin wants to apply writeback budget, they should enable writeback_limit_enable via:: $ echo 1 > /sys/block/zramX/writeback_limit_enable @@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit. (If admin doesn't enable writeback_limit_enable, writeback_limit's value assigned via /sys/block/zramX/writeback_limit is meaningless.) -If admin want to limit writeback as per-day 400M, he could do it +If admin wants to limit writeback as per-day 400M, they could do it like below:: $ MB_SHIFT=20 @@ -375,16 +375,16 @@ like below:: $ echo 1 > /sys/block/zram0/writeback_limit_enable If admins want to allow further write again once the budget is exhausted, -he could do it like below:: +they could do it like below:: $ echo $((400<>4K_SHIFT)) > \ /sys/block/zram0/writeback_limit -If admin wants to see remaining writeback budget since last set:: +If an admin wants to see the remaining writeback budget since last set:: $ cat /sys/block/zramX/writeback_limit -If admin want to disable writeback limit, he could do:: +If an admin wants to disable writeback limit, they could do:: $ echo 0 > /sys/block/zramX/writeback_limit_enable @@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of writeback happened until you reset the zram to allocate extra writeback budget in next setting is user's job. -If admin wants to measure writeback count in a certain period, he could +If admin wants to measure writeback count in a certain period, they could know it via /sys/block/zram0/bd_stat's 3rd column. memory tracking -- GitLab From 8716ef413aa55d9a25481f84eb54f9d571a1f421 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Tue, 1 Feb 2022 08:59:58 +0900 Subject: [PATCH 0446/1586] docs: pdfdocs: Tweak width params of TOC Sphinx has its own set of width parameters of Table of Contents (TOC) for LaTeX defined in its class definition of sphinxmanual.cls. It also inherits parameters for chapter entries from report.cls of original LaTeX base. However, they are optimized assuming small documents with tens of pages and chapters/sections of less than 10. To cope with some of kernel-doc documents with more than 1000 pages and several tens of chapters/sections, definitions of those parameters need to be adjusted. Unfortunately, those parameters are hard coded in the class definitions and need low-level LaTeX coding tricks to redefine. As Sphinx 1.7.9 does not have \sphinxtableofcontentshook, which defines those parameters in later Sphinx versions, for compatibility with both pre-1.8 and later Sphinx versions, empty the hook altogether and redefine \@pnumwidth, \l@chapter, \l@section, and \@subsection commands originally defined in report.cls. Summary of parameter changes: Width of page number (\@pnumwidth): 1.55em -> 2.7em Width of chapter number: 1.5em -> 1.8em Indent of section number: 1.5em -> 1.8em Width of section number: 2.6em -> 3.2em Indent of subsection number: 4.1em -> 5em Width of subsection number: 3.5em -> 4.3em Notes: 1. Parameters for subsection become relevant only when ":maxdepth: 3" is specified under "toctree::" (e.g., RCU/index.rst). They can hold subsection numbers up to 5 digits such as "18.7.13" (in RCU.pdf). 2. Number of chapters in driver-api.pdf is getting closer to 100. When it reaches 100, another set of tweaks will be necessary. 3. The low-level LaTeX trick is mentioned in "Unofficial LaTeX2e reference manual" at: http://latexref.xyz/Table-of-contents-etc_002e.html Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/e52b4718-7909-25be-fbc1-76800aa62ae3@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/conf.py b/Documentation/conf.py index f07f2e9b9f2c8..e5c13dee2de80 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -409,6 +409,37 @@ latex_elements = { # Additional stuff for the LaTeX preamble. 'preamble': ''' + % Custom width parameters for TOC --- Redefine low-level commands + % defined in report.cls + \\makeatletter + %% Redefine \\@pnumwidth (page number width) + \\renewcommand*\\@pnumwidth{2.7em} + %% Redefine \\l@chapter (chapter list entry) + \\renewcommand*\\l@chapter[2]{% + \\ifnum \\c@tocdepth >\\m@ne + \\addpenalty{-\\@highpenalty}% + \\vskip 1.0em \\@plus\\p@ + \\setlength\\@tempdima{1.8em}% + \\begingroup + \\parindent \\z@ \\rightskip \\@pnumwidth + \\parfillskip -\\@pnumwidth + \\leavevmode \\bfseries + \\advance\\leftskip\\@tempdima + \\hskip -\\leftskip + #1\\nobreak\\hfil + \\nobreak\\hb@xt@\\@pnumwidth{\\hss #2% + \\kern-\\p@\\kern\\p@}\\par + \\penalty\\@highpenalty + \\endgroup + \\fi} + %% Redefine \\l@section and \\l@subsection + \\renewcommand*\\l@section{\\@dottedtocline{1}{1.8em}{3.2em}} + \\renewcommand*\\l@subsection{\\@dottedtocline{2}{5em}{4.3em}} + \\makeatother + %% Sphinx < 1.8 doesn't have \\sphinxtableofcontentshook + \\providecommand{\\sphinxtableofcontentshook}{} + %% Undefine it for compatibility with Sphinx 1.7.9 + \\renewcommand{\\sphinxtableofcontentshook}{} % Empty the hook % Prevent column squeezing of tabulary. \\setlength{\\tymin}{20em} % Use some font with UTF-8 support with XeLaTeX -- GitLab From 66939df53948bbf66753100266076f6a3357b54c Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Tue, 1 Feb 2022 09:02:14 +0900 Subject: [PATCH 0447/1586] docs: pdfdocs: Switch default CJK font to KR variants xeCJK is enabled in Table of Contents (TOC) so that translations.pdf built by top-level "make pdfdocs" can have its TOC typeset properly. This causes quotation marks and apostrophe symbols appear too wide in Latin-script docs. This is because (1) Sphinx converts ASCII symbols into multi-byte UTF-8 ones in LaTeX and (2) in the SC variant of "Noto CJK" font families, those UTF-8 symbols have full-width glyph. The KR variant of the font families has half-width glyph for those symbols and TOC pages should look nicer when it is used instead. Switch the default CJK font families to the KR variant and teach xeCJK of those symbols' widths. To compensate the switch, teach xeCJK of the width in the SC and TC variants. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/0c8ea878-0a6f-ea01-ab45-4e66c5facee9@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index e5c13dee2de80..e70aa5fd969f3 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -460,13 +460,15 @@ latex_elements['preamble'] += ''' \\IfFontExistsTF{Noto Sans CJK SC}{ % This is needed for translations \\usepackage{xeCJK} - \\IfFontExistsTF{Noto Serif CJK SC}{ - \\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant] + \\IfFontExistsTF{Noto Serif CJK KR}{ + \\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant] }{ - \\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant] + \\setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant] } - \\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant] - \\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant] + \\setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant] + \\setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant] + \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘} + \\xeCJKDeclareCharClass{HalfRight}{`”,`’} % CJK Language-specific font choices \\IfFontExistsTF{Noto Serif CJK SC}{ \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant] @@ -513,11 +515,18 @@ latex_elements['preamble'] += ''' \\newcommand{\\kerneldocBeginSC}{% \\begingroup% \\scmain% + \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% + \\xeCJKDeclareCharClass{FullRight}{`”,`’}% + \\renewcommand{\\CJKrmdefault}{SCserif}% + \\renewcommand{\\CJKsfdefault}{SCsans}% + \\renewcommand{\\CJKttdefault}{SCmono}% } \\newcommand{\\kerneldocEndSC}{\\endgroup} \\newcommand{\\kerneldocBeginTC}{% \\begingroup% \\tcmain% + \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% + \\xeCJKDeclareCharClass{FullRight}{`”,`’}% \\renewcommand{\\CJKrmdefault}{TCserif}% \\renewcommand{\\CJKsfdefault}{TCsans}% \\renewcommand{\\CJKttdefault}{TCmono}% @@ -525,8 +534,6 @@ latex_elements['preamble'] += ''' \\newcommand{\\kerneldocEndTC}{\\endgroup} \\newcommand{\\kerneldocBeginKR}{% \\begingroup% - \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}% - \\xeCJKDeclareCharClass{HalfRight}{`”,`’}% \\krmain% \\renewcommand{\\CJKrmdefault}{KRserif}% \\renewcommand{\\CJKsfdefault}{KRsans}% @@ -536,8 +543,6 @@ latex_elements['preamble'] += ''' \\newcommand{\\kerneldocEndKR}{\\endgroup} \\newcommand{\\kerneldocBeginJP}{% \\begingroup% - \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}% - \\xeCJKDeclareCharClass{HalfRight}{`”,`’}% \\jpmain% \\renewcommand{\\CJKrmdefault}{JPserif}% \\renewcommand{\\CJKsfdefault}{JPsans}% -- GitLab From 7b686a2ea1e41e75c35ff2ec333d68b2b8c032d6 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Tue, 1 Feb 2022 09:03:16 +0900 Subject: [PATCH 0448/1586] docs: pdfdocs: Enable CJKspace in TOC for Korean titles Korean (Hangul) titles in Table of Contents of translations.pdf don't have inter-phrase spaces. This is because the CJKspace option of xeCJK is disabled by default. Restore the spaces by enabling the option at the beginning of every document and disable it in the \kerneldocBegin{SC|TC|JP} commands. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/19141b3e-01d9-1f6d-5020-42fbda784831@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index e70aa5fd969f3..ded49b8e9bf63 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -520,6 +520,7 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKrmdefault}{SCserif}% \\renewcommand{\\CJKsfdefault}{SCsans}% \\renewcommand{\\CJKttdefault}{SCmono}% + \\xeCJKsetup{CJKspace = false}% } \\newcommand{\\kerneldocEndSC}{\\endgroup} \\newcommand{\\kerneldocBeginTC}{% @@ -530,6 +531,7 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKrmdefault}{TCserif}% \\renewcommand{\\CJKsfdefault}{TCsans}% \\renewcommand{\\CJKttdefault}{TCmono}% + \\xeCJKsetup{CJKspace = false}% } \\newcommand{\\kerneldocEndTC}{\\endgroup} \\newcommand{\\kerneldocBeginKR}{% @@ -538,7 +540,7 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKrmdefault}{KRserif}% \\renewcommand{\\CJKsfdefault}{KRsans}% \\renewcommand{\\CJKttdefault}{KRmono}% - \\xeCJKsetup{CJKspace = true} % For inter-phrase space + % \\xeCJKsetup{CJKspace = true} % true by default } \\newcommand{\\kerneldocEndKR}{\\endgroup} \\newcommand{\\kerneldocBeginJP}{% @@ -547,6 +549,7 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKrmdefault}{JPserif}% \\renewcommand{\\CJKsfdefault}{JPsans}% \\renewcommand{\\CJKttdefault}{JPmono}% + \\xeCJKsetup{CJKspace = false}% } \\newcommand{\\kerneldocEndJP}{\\endgroup} % Single spacing in literal blocks @@ -555,6 +558,7 @@ latex_elements['preamble'] += ''' \\usepackage{etoolbox} % Inactivate CJK after tableofcontents \\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{} + \\xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC }{ % No CJK font found % Custom macros to on/off CJK (Dummy) \\newcommand{\\kerneldocCJKon}{} -- GitLab From 5d9158e3c762f0bf1753501d8e64eb6fe19dc437 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Tue, 1 Feb 2022 09:04:40 +0900 Subject: [PATCH 0449/1586] docs/translations: Skip CJK contents if suitable fonts not found On systems without "Noto Sans CJK" fonts, CJK chapters in translations.pdf are full of "TOFU" boxes, with a long build time and a large log file containing lots of missing-font warnings. Avoid such waste of time and resources by skipping CJK chapters when CJK fonts are not available. To skip whole chapters, change the definition of \kerneldocBegin{SC|TC|KR|JP} commands so that they can have an argument to be ignored. This works as far as the argument (#1) is not used in the command. In place of skipped contents, put a note on skipped contents at the beginning of the PDF. Change the call sites in index.rst of CJK translations accordingly. When CJK fonts are available, existing command definitions with no argument just work. LaTeX engine will see additional pairs of "{" and "}", which add a level of grouping without having any effect on typesetting. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/3359ca41-b81d-b2c7-e437-7618efbe241d@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 13 +++++++++---- Documentation/translations/ja_JP/index.rst | 4 ++-- Documentation/translations/ko_KR/index.rst | 5 ++--- Documentation/translations/zh_CN/index.rst | 4 ++-- Documentation/translations/zh_TW/index.rst | 4 ++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index ded49b8e9bf63..62cd0e472b3b3 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -563,13 +563,18 @@ latex_elements['preamble'] += ''' % Custom macros to on/off CJK (Dummy) \\newcommand{\\kerneldocCJKon}{} \\newcommand{\\kerneldocCJKoff}{} - \\newcommand{\\kerneldocBeginSC}{} + \\newcommand{\\kerneldocBeginSC}[1]{% + \\begin{sphinxadmonition}{note}{Note:} + ``Noto Sans CJK'' fonts are not found while building this PDF\\@. + Translations of zh\\_CN, zh\\_TW, ko\\_KR, and ja\\_JP are + skipped. + \\end{sphinxadmonition}} \\newcommand{\\kerneldocEndSC}{} - \\newcommand{\\kerneldocBeginTC}{} + \\newcommand{\\kerneldocBeginTC}[1]{} \\newcommand{\\kerneldocEndTC}{} - \\newcommand{\\kerneldocBeginKR}{} + \\newcommand{\\kerneldocBeginKR}[1]{} \\newcommand{\\kerneldocEndKR}{} - \\newcommand{\\kerneldocBeginJP}{} + \\newcommand{\\kerneldocBeginJP}[1]{} \\newcommand{\\kerneldocEndJP}{} } ''' diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst index 88d4d98eed15a..20738c931d025 100644 --- a/Documentation/translations/ja_JP/index.rst +++ b/Documentation/translations/ja_JP/index.rst @@ -3,7 +3,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginJP + \kerneldocBeginJP{ Japanese translations ===================== @@ -15,4 +15,4 @@ Japanese translations .. raw:: latex - \kerneldocEndJP + }\kerneldocEndJP diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst index f636b482fb4c1..4add6b2fe1f21 100644 --- a/Documentation/translations/ko_KR/index.rst +++ b/Documentation/translations/ko_KR/index.rst @@ -3,7 +3,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginKR + \kerneldocBeginKR{ 한국어 번역 =========== @@ -26,5 +26,4 @@ .. raw:: latex - \normalsize - \kerneldocEndKR + }\kerneldocEndKR diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index f831887eacfb1..23f8f4c68e83c 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -5,7 +5,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginSC + \kerneldocBeginSC{ .. _linux_doc_zh: @@ -198,4 +198,4 @@ TODOList: .. raw:: latex - \kerneldocEndSC + }\kerneldocEndSC diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst index f56f78ba78609..e1ce9d8c06f8f 100644 --- a/Documentation/translations/zh_TW/index.rst +++ b/Documentation/translations/zh_TW/index.rst @@ -5,7 +5,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginTC + \kerneldocBeginTC{ .. _linux_doc_zh_tw: @@ -174,4 +174,4 @@ TODOList: .. raw:: latex - \kerneldocEndTC + }\kerneldocEndTC -- GitLab From b774cc46313b3d7c9139f29df67818a8b858c558 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Tue, 1 Feb 2022 09:05:40 +0900 Subject: [PATCH 0450/1586] docs: pdfdocs: Move CJK monospace font setting to main conf.py As LaTeX macros for CJK font settings can have Latin-script font settings as well, settings under Documentation/translations/ can be moved to the main conf.py. By this change, translations.pdf built by top-level "make pdfdocs" can have properly aligned ascii-art diagrams except for Korean ones. For the reason of remaining misalignment in Korean diagrams, see changelog of commit a90dad8f610a ("docs: pdfdocs: Add conf.py local to translations for ascii-art alignment"). Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/eb87790a-03f4-9f29-c8a3-ef2c3e78ca18@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 8 ++++++++ Documentation/translations/conf.py | 12 ------------ 2 files changed, 8 insertions(+), 12 deletions(-) delete mode 100644 Documentation/translations/conf.py diff --git a/Documentation/conf.py b/Documentation/conf.py index 62cd0e472b3b3..fb8f69fc4d38c 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -521,6 +521,8 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKsfdefault}{SCsans}% \\renewcommand{\\CJKttdefault}{SCmono}% \\xeCJKsetup{CJKspace = false}% + % For CJK ascii-art alignment + \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]% } \\newcommand{\\kerneldocEndSC}{\\endgroup} \\newcommand{\\kerneldocBeginTC}{% @@ -532,6 +534,8 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKsfdefault}{TCsans}% \\renewcommand{\\CJKttdefault}{TCmono}% \\xeCJKsetup{CJKspace = false}% + % For CJK ascii-art alignment + \\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]% } \\newcommand{\\kerneldocEndTC}{\\endgroup} \\newcommand{\\kerneldocBeginKR}{% @@ -541,6 +545,8 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKsfdefault}{KRsans}% \\renewcommand{\\CJKttdefault}{KRmono}% % \\xeCJKsetup{CJKspace = true} % true by default + % For CJK ascii-art alignment (still misaligned for Hangul) + \\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]% } \\newcommand{\\kerneldocEndKR}{\\endgroup} \\newcommand{\\kerneldocBeginJP}{% @@ -550,6 +556,8 @@ latex_elements['preamble'] += ''' \\renewcommand{\\CJKsfdefault}{JPsans}% \\renewcommand{\\CJKttdefault}{JPmono}% \\xeCJKsetup{CJKspace = false}% + % For CJK ascii-art alignment + \\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]% } \\newcommand{\\kerneldocEndJP}{\\endgroup} % Single spacing in literal blocks diff --git a/Documentation/translations/conf.py b/Documentation/translations/conf.py deleted file mode 100644 index 92cdbba742299..0000000000000 --- a/Documentation/translations/conf.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-License-Identifier: GPL-2.0 - -# -- Additinal options for LaTeX output ---------------------------------- -# font config for ascii-art alignment - -latex_elements['preamble'] += ''' - \\IfFontExistsTF{Noto Sans CJK SC}{ - % For CJK ascii-art alignment - \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant] - }{} -''' -- GitLab From 10720e120e2b1d66172dccb06eb4f346a665cca6 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Jan 2022 20:43:04 +0800 Subject: [PATCH 0451/1586] clocksource/drivers/imx-tpm: Exclude sched clock for ARM64 For ARM64 platform such as i.MX8ULP which has ARMv8 generic timer as sched clock, which is much faster compared with tpm sched clock. Reading the tpm count register in i.MX8ULP requires about 290ns, this is slow and introduce scheduler latency. So exclude tpm sched clock for ARM64 platform. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20220105124304.3567629-1-peng.fan@oss.nxp.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index df8064122b10c..60cefc247b715 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void) tpm_delay_timer.read_current_timer = &tpm_read_current_timer; tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3; register_current_timer_delay(&tpm_delay_timer); -#endif sched_clock_register(tpm_read_sched_clock, counter_width, timer_of_rate(&to_tpm) >> 3); +#endif return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", -- GitLab From 47b34f495b8b75475952f12c521c4c1fc2fa09b4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 Feb 2022 15:51:39 +0200 Subject: [PATCH 0452/1586] spi: intel-pci: Add support for Intel Ice Lake-N SPI serial flash Intel Ice Lake-N has the same SPI serial flash controller as Ice Lake-LP. Add Ice Lake-N PCI ID to the driver list of supported devices. The device can be found on MacBookPro16,2 [1]. [1]: https://linux-hardware.org/?probe=f1c5cf0c43 Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20220215135139.4328-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index a9cb4d77ffe39..a5ef7a526a7fc 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -66,6 +66,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x19e0), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x1bca), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x34a4), (unsigned long)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, -- GitLab From 2b993ab79b5dc83eb699e747bfac6c04f4f5fc70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Wed, 16 Feb 2022 13:27:19 -0300 Subject: [PATCH 0453/1586] spi: amd: Fix building without ACPI enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 209043554915 ("spi: amd: Add support for version AMDI0062") removed the cast ACPI_PTR() for no good reason. This wrapper is important to make sure that the driver can be compiled with or without CONFIG_ACPI enabled, useful for compiling test. Give back the cast so compilation works again. Fixes: 209043554915 ("spi: amd: Add support for version AMDI0062") Signed-off-by: André Almeida Link: https://lore.kernel.org/r/20220216162719.116062-1-andrealmeid@collabora.com Signed-off-by: Mark Brown --- drivers/spi/spi-amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index d909afac6e219..cba6a4486c24c 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -330,7 +330,7 @@ MODULE_DEVICE_TABLE(acpi, spi_acpi_match); static struct platform_driver amd_spi_driver = { .driver = { .name = "amd_spi", - .acpi_match_table = spi_acpi_match, + .acpi_match_table = ACPI_PTR(spi_acpi_match), }, .probe = amd_spi_probe, }; -- GitLab From 54d0fd06e2bd52d3b17648de787157a7c0625adb Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Wed, 16 Feb 2022 11:13:17 +0200 Subject: [PATCH 0454/1586] spi: pxa2xx: Add support for Intel Raptor Lake PCH-S Add support for LPSS SPI on Intel Raptor Lake PCH-S. It has four controllers each having two chip selects. Signed-off-by: Jarkko Nikula Link: https://lore.kernel.org/r/20220216091317.1302254-1-jarkko.nikula@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index abb9f0ffd3773..edb42d08857d0 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1394,6 +1394,11 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = { { PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP }, { PCI_VDEVICE(INTEL, 0x5ac6), LPSS_BXT_SSP }, + /* RPL-S */ + { PCI_VDEVICE(INTEL, 0x7a2a), LPSS_CNL_SSP }, + { PCI_VDEVICE(INTEL, 0x7a2b), LPSS_CNL_SSP }, + { PCI_VDEVICE(INTEL, 0x7a79), LPSS_CNL_SSP }, + { PCI_VDEVICE(INTEL, 0x7a7b), LPSS_CNL_SSP }, /* ADL-S */ { PCI_VDEVICE(INTEL, 0x7aaa), LPSS_CNL_SSP }, { PCI_VDEVICE(INTEL, 0x7aab), LPSS_CNL_SSP }, -- GitLab From 08f253ec3767bcfafc5d32617a92cee57c63968e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 15 Feb 2022 21:44:46 -0600 Subject: [PATCH 0455/1586] x86/cpu: Clear SME feature flag when not in use Currently, the SME CPU feature flag is reflective of whether the CPU supports the feature but not whether it has been activated by the kernel. Change this around to clear the SME feature flag if the kernel is not using it so userspace can determine if it is available and in use from /proc/cpuinfo. As the feature flag is cleared on systems where SME isn't active, use CPUID 0x8000001f to confirm SME availability before calling native_wbinvd(). Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/20220216034446.2430634-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 5 +++++ arch/x86/kernel/process.c | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bad0fa4c17799..0c0b09796ced3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -556,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) * the SME physical address space reduction value. * If BIOS has not enabled SME then don't advertise the * SME feature (set in scattered.c). + * If the kernel has not enabled SME via any means then + * don't advertise the SME feature. * For SEV: If BIOS has not enabled SEV then don't advertise the * SEV and SEV_ES feature (set in scattered.c). * @@ -578,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) if (IS_ENABLED(CONFIG_X86_32)) goto clear_all; + if (!sme_me_mask) + setup_clear_cpu_cap(X86_FEATURE_SME); + rdmsrl(MSR_K7_HWCR, msr); if (!(msr & MSR_K7_HWCR_SMMLOCK)) goto clear_sev; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 81d8ef036637c..e131d71b3cae9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy) * without the encryption bit, they don't race each other when flushed * and potentially end up with the wrong entry being committed to * memory. + * + * Test the CPUID bit directly because the machine might've cleared + * X86_FEATURE_SME due to cmdline options. */ - if (boot_cpu_has(X86_FEATURE_SME)) + if (cpuid_eax(0x8000001f) & BIT(0)) native_wbinvd(); for (;;) { /* -- GitLab From 3f51aa9e296fe4af785d5761bb12556fb2494761 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 9 Feb 2022 19:29:51 +0800 Subject: [PATCH 0456/1586] PM: hibernate: fix load_image_and_restore() error path As 'swsusp_check' open 'hib_resume_bdev', if call 'create_basic_memory_bitmaps' failed, we need to close 'hib_resume_bdev' in 'load_image_and_restore' function. Signed-off-by: Ye Bin [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e6af502c2fd77..49d1df0218cb8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -689,8 +689,10 @@ static int load_image_and_restore(void) lock_device_hotplug(); error = create_basic_memory_bitmaps(); - if (error) + if (error) { + swsusp_close(FMODE_READ | FMODE_EXCL); goto Unlock; + } error = swsusp_read(&flags); swsusp_close(FMODE_READ | FMODE_EXCL); -- GitLab From e1be43d9b5d0d1310dbd90185a8e5c7145dde40f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 18 Sep 2021 15:17:53 -0700 Subject: [PATCH 0457/1586] overflow: Implement size_t saturating arithmetic helpers In order to perform more open-coded replacements of common allocation size arithmetic, the kernel needs saturating (SIZE_MAX) helpers for multiplication, addition, and subtraction. For example, it is common in allocators, especially on realloc, to add to an existing size: p = krealloc(map->patch, sizeof(struct reg_sequence) * (map->patch_regs + num_regs), GFP_KERNEL); There is no existing saturating replacement for this calculation, and just leaving the addition open coded inside array_size() could potentially overflow as well. For example, an overflow in an expression for a size_t argument might wrap to zero: array_size(anything, something_at_size_max + 1) == 0 Introduce size_mul(), size_add(), and size_sub() helpers that implicitly promote arguments to size_t and saturated calculations for use in allocations. With these helpers it is also possible to redefine array_size(), array3_size(), flex_array_size(), and struct_size() in terms of the new helpers. As with the check_*_overflow() helpers, the new helpers use __must_check, though what is really desired is a way to make sure that assignment is only to a size_t lvalue. Without this, it's still possible to introduce overflow/underflow via type conversion (i.e. from size_t to int). Enforcing this will currently need to be left to static analysis or future use of -Wconversion. Additionally update the overflow unit tests to force runtime evaluation for the pathological cases. Cc: Rasmus Villemoes Cc: Gustavo A. R. Silva Cc: Nathan Chancellor Cc: Jason Gunthorpe Cc: Nick Desaulniers Cc: Leon Romanovsky Cc: Keith Busch Cc: Len Baker Signed-off-by: Kees Cook --- Documentation/process/deprecated.rst | 20 ++++- include/linux/overflow.h | 110 +++++++++++++++++---------- lib/test_overflow.c | 98 ++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 44 deletions(-) diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst index 388cb19f5dbb6..a6e36d9c3d14b 100644 --- a/Documentation/process/deprecated.rst +++ b/Documentation/process/deprecated.rst @@ -71,6 +71,9 @@ Instead, the 2-factor form of the allocator should be used:: foo = kmalloc_array(count, size, GFP_KERNEL); +Specifically, kmalloc() can be replaced with kmalloc_array(), and +kzalloc() can be replaced with kcalloc(). + If no 2-factor form is available, the saturate-on-overflow helpers should be used:: @@ -91,9 +94,20 @@ Instead, use the helper:: array usage and switch to a `flexible array member <#zero-length-and-one-element-arrays>`_ instead. -See array_size(), array3_size(), and struct_size(), -for more details as well as the related check_add_overflow() and -check_mul_overflow() family of functions. +For other calculations, please compose the use of the size_mul(), +size_add(), and size_sub() helpers. For example, in the case of:: + + foo = krealloc(current_size + chunk_size * (count - 3), GFP_KERNEL); + +Instead, use the helpers:: + + foo = krealloc(size_add(current_size, + size_mul(chunk_size, + size_sub(count, 3))), GFP_KERNEL); + +For more details, also see array3_size() and flex_array_size(), +as well as the related check_mul_overflow(), check_add_overflow(), +check_sub_overflow(), and check_shl_overflow() family of functions. simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull() ---------------------------------------------------------------------- diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 4669632bd72bc..59d7228104d02 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -118,81 +118,94 @@ static inline bool __must_check __must_check_overflow(bool overflow) })) /** - * array_size() - Calculate size of 2-dimensional array. - * - * @a: dimension one - * @b: dimension two + * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX * - * Calculates size of 2-dimensional array: @a * @b. + * @factor1: first factor + * @factor2: second factor * - * Returns: number of bytes needed to represent the array or SIZE_MAX on - * overflow. + * Returns: calculate @factor1 * @factor2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t array_size(size_t a, size_t b) +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) + if (check_mul_overflow(factor1, factor2, &bytes)) return SIZE_MAX; return bytes; } /** - * array3_size() - Calculate size of 3-dimensional array. + * size_add() - Calculate size_t addition with saturation at SIZE_MAX * - * @a: dimension one - * @b: dimension two - * @c: dimension three - * - * Calculates size of 3-dimensional array: @a * @b * @c. + * @addend1: first addend + * @addend2: second addend * - * Returns: number of bytes needed to represent the array or SIZE_MAX on - * overflow. + * Returns: calculate @addend1 + @addend2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) +static inline size_t __must_check size_add(size_t addend1, size_t addend2) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) - return SIZE_MAX; - if (check_mul_overflow(bytes, c, &bytes)) + if (check_add_overflow(addend1, addend2, &bytes)) return SIZE_MAX; return bytes; } -/* - * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for - * struct_size() below. +/** + * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX + * + * @minuend: value to subtract from + * @subtrahend: value to subtract from @minuend + * + * Returns: calculate @minuend - @subtrahend, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. For + * composition with the size_add() and size_mul() helpers, neither + * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). + * The lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) +static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) - return SIZE_MAX; - if (check_add_overflow(bytes, c, &bytes)) + if (minuend == SIZE_MAX || subtrahend == SIZE_MAX || + check_sub_overflow(minuend, subtrahend, &bytes)) return SIZE_MAX; return bytes; } /** - * struct_size() - Calculate size of structure with trailing array. - * @p: Pointer to the structure. - * @member: Name of the array member. - * @count: Number of elements in the array. + * array_size() - Calculate size of 2-dimensional array. * - * Calculates size of memory needed for structure @p followed by an - * array of @count number of @member elements. + * @a: dimension one + * @b: dimension two * - * Return: number of bytes needed or SIZE_MAX on overflow. + * Calculates size of 2-dimensional array: @a * @b. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. */ -#define struct_size(p, member, count) \ - __ab_c_size(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member),\ - sizeof(*(p))) +#define array_size(a, b) size_mul(a, b) + +/** + * array3_size() - Calculate size of 3-dimensional array. + * + * @a: dimension one + * @b: dimension two + * @c: dimension three + * + * Calculates size of 3-dimensional array: @a * @b * @c. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +#define array3_size(a, b, c) size_mul(size_mul(a, b), c) /** * flex_array_size() - Calculate size of a flexible array member @@ -208,7 +221,22 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define flex_array_size(p, member, count) \ - array_size(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member)) + size_mul(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member)) + +/** + * struct_size() - Calculate size of structure with trailing flexible array. + * + * @p: Pointer to the structure. + * @member: Name of the array member. + * @count: Number of elements in the array. + * + * Calculates size of memory needed for structure @p followed by an + * array of @count number of @member elements. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size(p, member, count) \ + size_add(sizeof(*(p)), flex_array_size(p, member, count)) #endif /* __LINUX_OVERFLOW_H */ diff --git a/lib/test_overflow.c b/lib/test_overflow.c index cea37ae826153..712fb2351c271 100644 --- a/lib/test_overflow.c +++ b/lib/test_overflow.c @@ -594,12 +594,110 @@ static int __init test_overflow_allocation(void) return err; } +struct __test_flex_array { + unsigned long flags; + size_t count; + unsigned long data[]; +}; + +static int __init test_overflow_size_helpers(void) +{ + struct __test_flex_array *obj; + int count = 0; + int err = 0; + int var; + +#define check_one_size_helper(expected, func, args...) ({ \ + bool __failure = false; \ + size_t _r; \ + \ + _r = func(args); \ + if (_r != (expected)) { \ + pr_warn("expected " #func "(" #args ") " \ + "to return %zu but got %zu instead\n", \ + (size_t)(expected), _r); \ + __failure = true; \ + } \ + count++; \ + __failure; \ +}) + + var = 4; + err |= check_one_size_helper(20, size_mul, var++, 5); + err |= check_one_size_helper(20, size_mul, 4, var++); + err |= check_one_size_helper(0, size_mul, 0, 3); + err |= check_one_size_helper(0, size_mul, 3, 0); + err |= check_one_size_helper(6, size_mul, 2, 3); + err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 1); + err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 3); + err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3); + + var = 4; + err |= check_one_size_helper(9, size_add, var++, 5); + err |= check_one_size_helper(9, size_add, 4, var++); + err |= check_one_size_helper(9, size_add, 9, 0); + err |= check_one_size_helper(9, size_add, 0, 9); + err |= check_one_size_helper(5, size_add, 2, 3); + err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 1); + err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 3); + err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3); + + var = 4; + err |= check_one_size_helper(1, size_sub, var--, 3); + err |= check_one_size_helper(1, size_sub, 4, var--); + err |= check_one_size_helper(1, size_sub, 3, 2); + err |= check_one_size_helper(9, size_sub, 9, 0); + err |= check_one_size_helper(SIZE_MAX, size_sub, 9, -3); + err |= check_one_size_helper(SIZE_MAX, size_sub, 0, 9); + err |= check_one_size_helper(SIZE_MAX, size_sub, 2, 3); + err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 0); + err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10); + err |= check_one_size_helper(SIZE_MAX, size_sub, 0, SIZE_MAX); + err |= check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX); + err |= check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1, 1); + err |= check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1, 3); + err |= check_one_size_helper(1, size_sub, SIZE_MAX - 1, -3); + + var = 4; + err |= check_one_size_helper(4 * sizeof(*obj->data), + flex_array_size, obj, data, var++); + err |= check_one_size_helper(5 * sizeof(*obj->data), + flex_array_size, obj, data, var++); + err |= check_one_size_helper(0, flex_array_size, obj, data, 0); + err |= check_one_size_helper(sizeof(*obj->data), + flex_array_size, obj, data, 1); + err |= check_one_size_helper(7 * sizeof(*obj->data), + flex_array_size, obj, data, 7); + err |= check_one_size_helper(SIZE_MAX, + flex_array_size, obj, data, -1); + err |= check_one_size_helper(SIZE_MAX, + flex_array_size, obj, data, SIZE_MAX - 4); + + var = 4; + err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)), + struct_size, obj, data, var++); + err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)), + struct_size, obj, data, var++); + err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0); + err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data), + struct_size, obj, data, 1); + err |= check_one_size_helper(SIZE_MAX, + struct_size, obj, data, -3); + err |= check_one_size_helper(SIZE_MAX, + struct_size, obj, data, SIZE_MAX - 3); + + pr_info("%d overflow size helper tests finished\n", count); + + return err; +} + static int __init test_module_init(void) { int err = 0; err |= test_overflow_calculation(); err |= test_overflow_shift(); + err |= test_overflow_size_helpers(); err |= test_overflow_allocation(); if (err) { -- GitLab From 230f6fa2c1db6a3f3e668cfe95995ac8e6eee212 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Feb 2022 16:40:41 -0800 Subject: [PATCH 0458/1586] overflow: Provide constant expression struct_size There have been cases where struct_size() (or flex_array_size()) needs to be calculated for an initializer, which requires it be a constant expression. This is possible when the "count" argument is a constant expression, so provide this ability for the helpers. Cc: Gustavo A. R. Silva Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Rasmus Villemoes Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Tested-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20220210010407.GA701603@embeddedor --- include/linux/overflow.h | 10 +++++++--- lib/test_overflow.c | 26 +++++++++++++++++--------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 59d7228104d02..f1221d11f8e57 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -4,6 +4,7 @@ #include #include +#include /* * We need to compute the minimum and maximum values representable in a given @@ -221,8 +222,9 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define flex_array_size(p, member, count) \ - size_mul(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member)) + __builtin_choose_expr(__is_constexpr(count), \ + (count) * sizeof(*(p)->member) + __must_be_array((p)->member), \ + size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member))) /** * struct_size() - Calculate size of structure with trailing flexible array. @@ -237,6 +239,8 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define struct_size(p, member, count) \ - size_add(sizeof(*(p)), flex_array_size(p, member, count)) + __builtin_choose_expr(__is_constexpr(count), \ + sizeof(*(p)) + flex_array_size(p, member, count), \ + size_add(sizeof(*(p)), flex_array_size(p, member, count))) #endif /* __LINUX_OVERFLOW_H */ diff --git a/lib/test_overflow.c b/lib/test_overflow.c index 712fb2351c271..f6530fce799db 100644 --- a/lib/test_overflow.c +++ b/lib/test_overflow.c @@ -602,10 +602,18 @@ struct __test_flex_array { static int __init test_overflow_size_helpers(void) { + /* Make sure struct_size() can be used in a constant expression. */ + u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)]; struct __test_flex_array *obj; int count = 0; int err = 0; int var; + volatile int unconst = 0; + + /* Verify constant expression against runtime version. */ + var = 55; + OPTIMIZER_HIDE_VAR(var); + err |= sizeof(ce_array) != struct_size(obj, data, var); #define check_one_size_helper(expected, func, args...) ({ \ bool __failure = false; \ @@ -663,28 +671,28 @@ static int __init test_overflow_size_helpers(void) flex_array_size, obj, data, var++); err |= check_one_size_helper(5 * sizeof(*obj->data), flex_array_size, obj, data, var++); - err |= check_one_size_helper(0, flex_array_size, obj, data, 0); + err |= check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst); err |= check_one_size_helper(sizeof(*obj->data), - flex_array_size, obj, data, 1); + flex_array_size, obj, data, 1 + unconst); err |= check_one_size_helper(7 * sizeof(*obj->data), - flex_array_size, obj, data, 7); + flex_array_size, obj, data, 7 + unconst); err |= check_one_size_helper(SIZE_MAX, - flex_array_size, obj, data, -1); + flex_array_size, obj, data, -1 + unconst); err |= check_one_size_helper(SIZE_MAX, - flex_array_size, obj, data, SIZE_MAX - 4); + flex_array_size, obj, data, SIZE_MAX - 4 + unconst); var = 4; err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)), struct_size, obj, data, var++); err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)), struct_size, obj, data, var++); - err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0); + err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst); err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data), - struct_size, obj, data, 1); + struct_size, obj, data, 1 + unconst); err |= check_one_size_helper(SIZE_MAX, - struct_size, obj, data, -3); + struct_size, obj, data, -3 + unconst); err |= check_one_size_helper(SIZE_MAX, - struct_size, obj, data, SIZE_MAX - 3); + struct_size, obj, data, SIZE_MAX - 3 + unconst); pr_info("%d overflow size helper tests finished\n", count); -- GitLab From 248c793359daacd826a7507a258ffe41653efef7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 11:05:36 +0100 Subject: [PATCH 0459/1586] blk-mq: make the blk-mq stacking code optional The code to stack blk-mq drivers is only used by dm-multipath, and will preferably stay that way. Make it optional and only selected by device mapper, so that the buildbots more easily catch abuses like the one that slipped in in the ufs driver in the last merged window. Another positive side effects is that kernel builds without device mapper shrink a little bit as well. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220215100540.3892965-2-hch@lst.de Signed-off-by: Jens Axboe --- block/Kconfig | 3 +++ block/blk-mq.c | 2 ++ drivers/md/Kconfig | 1 + 3 files changed, 6 insertions(+) diff --git a/block/Kconfig b/block/Kconfig index 205f8d01c6952..168b873eb666d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -230,6 +230,9 @@ config BLK_PM config BLOCK_HOLDER_DEPRECATED bool +config BLK_MQ_STACKING + bool + source "block/Kconfig.iosched" endif # BLOCK diff --git a/block/blk-mq.c b/block/blk-mq.c index 6c59ffe765fde..db62d34afb637 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2840,6 +2840,7 @@ void blk_mq_submit_bio(struct bio *bio) blk_mq_try_issue_directly(rq->mq_hctx, rq)); } +#ifdef CONFIG_BLK_MQ_STACKING /** * blk_cloned_rq_check_limits - Helper function to check a cloned request * for the new queue limits @@ -3017,6 +3018,7 @@ free_and_out: return -ENOMEM; } EXPORT_SYMBOL_GPL(blk_rq_prep_clone); +#endif /* CONFIG_BLK_MQ_STACKING */ /* * Steal bios from a request and add them to a bio list. diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b5ea378e66cb1..998a5cfdbc4e9 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -204,6 +204,7 @@ config BLK_DEV_DM tristate "Device mapper support" select BLOCK_HOLDER_DEPRECATED if SYSFS select BLK_DEV_DM_BUILTIN + select BLK_MQ_STACKING depends on DAX || DAX=n help Device-mapper is a low level volume manager. It works by allowing -- GitLab From a5efda3c46a1db9a579d953667906933d5037bf9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 11:05:37 +0100 Subject: [PATCH 0460/1586] blk-mq: fold blk_cloned_rq_check_limits into blk_insert_cloned_request Fold blk_cloned_rq_check_limits into its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220215100540.3892965-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index db62d34afb637..fc132933397fb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2842,26 +2842,14 @@ void blk_mq_submit_bio(struct bio *bio) #ifdef CONFIG_BLK_MQ_STACKING /** - * blk_cloned_rq_check_limits - Helper function to check a cloned request - * for the new queue limits - * @q: the queue - * @rq: the request being checked - * - * Description: - * @rq may have been made based on weaker limitations of upper-level queues - * in request stacking drivers, and it may violate the limitation of @q. - * Since the block layer and the underlying device driver trust @rq - * after it is inserted to @q, it should be checked against @q before - * the insertion using this generic function. - * - * Request stacking drivers like request-based dm may change the queue - * limits when retrying requests on other queues. Those requests need - * to be checked against the new queue limits again during dispatch. + * blk_insert_cloned_request - Helper for stacking drivers to submit a request + * @q: the queue to submit the request + * @rq: the request being queued */ -static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q, - struct request *rq) +blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); + blk_status_t ret; if (blk_rq_sectors(rq) > max_sectors) { /* @@ -2893,22 +2881,6 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q, return BLK_STS_IOERR; } - return BLK_STS_OK; -} - -/** - * blk_insert_cloned_request - Helper for stacking drivers to submit a request - * @q: the queue to submit the request - * @rq: the request being queued - */ -blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) -{ - blk_status_t ret; - - ret = blk_cloned_rq_check_limits(q, rq); - if (ret != BLK_STS_OK) - return ret; - if (rq->q->disk && should_fail_request(rq->q->disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; -- GitLab From 28db4711bf48303814dcfd8d41a41106e90bc374 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 11:05:38 +0100 Subject: [PATCH 0461/1586] blk-mq: remove the request_queue argument to blk_insert_cloned_request The request must be submitted to the queue it was allocated for, so remove the extra request_queue argument. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220215100540.3892965-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 ++++----- drivers/md/dm-rq.c | 2 +- include/linux/blk-mq.h | 3 +-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index fc132933397fb..886836a54064c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2843,11 +2843,11 @@ void blk_mq_submit_bio(struct bio *bio) #ifdef CONFIG_BLK_MQ_STACKING /** * blk_insert_cloned_request - Helper for stacking drivers to submit a request - * @q: the queue to submit the request * @rq: the request being queued */ -blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) +blk_status_t blk_insert_cloned_request(struct request *rq) { + struct request_queue *q = rq->q; unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); blk_status_t ret; @@ -2881,8 +2881,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * return BLK_STS_IOERR; } - if (rq->q->disk && - should_fail_request(rq->q->disk->part0, blk_rq_bytes(rq))) + if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; if (blk_crypto_insert_cloned_request(rq)) @@ -2895,7 +2894,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - blk_mq_run_dispatch_ops(rq->q, + blk_mq_run_dispatch_ops(q, ret = blk_mq_request_issue_directly(rq, true)); if (ret) blk_account_io_done(rq, ktime_get_ns()); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 579ab6183d4d8..2fcc9b7f391b3 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -311,7 +311,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ clone->rq_flags |= RQF_IO_STAT; clone->start_time_ns = ktime_get_ns(); - r = blk_insert_cloned_request(clone->q, clone); + r = blk_insert_cloned_request(clone); if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE) /* must complete clone in terms of original request */ dm_complete_request(rq, r); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d319ffa59354a..3a41d50b85d3a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -952,8 +952,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); void blk_rq_unprep_clone(struct request *rq); -blk_status_t blk_insert_cloned_request(struct request_queue *q, - struct request *rq); +blk_status_t blk_insert_cloned_request(struct request *rq); struct rq_map_data { struct page **pages; -- GitLab From 8803c89f365b344859decd5b3074e9bb3b65caa1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 11:05:39 +0100 Subject: [PATCH 0462/1586] dm: remove useless code from dm_dispatch_clone_request Both ->start_time_ns and the RQF_IO_STAT are set when the request is allocated using blk_mq_alloc_request by dm-mpath in blk_mq_rq_ctx_init. The block layer also ensures ->start_time_ns is only set when actually needed. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220215100540.3892965-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-rq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 2fcc9b7f391b3..8f6117342d322 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -307,10 +307,6 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ { blk_status_t r; - if (blk_queue_io_stat(clone->q)) - clone->rq_flags |= RQF_IO_STAT; - - clone->start_time_ns = ktime_get_ns(); r = blk_insert_cloned_request(clone); if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE) /* must complete clone in terms of original request */ -- GitLab From 9f9adea7187ec1978bd3863f59b7fe27ccf33519 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 11:05:40 +0100 Subject: [PATCH 0463/1586] dm: remove dm_dispatch_clone_request Fold dm_dispatch_clone_request into it's only caller, and use a switch statement to single dispatch for the handling of the different return values from blk_insert_cloned_request. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220215100540.3892965-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-rq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 8f6117342d322..6948d5db90925 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -303,17 +303,6 @@ static void end_clone_request(struct request *clone, blk_status_t error) dm_complete_request(tio->orig, error); } -static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq) -{ - blk_status_t r; - - r = blk_insert_cloned_request(clone); - if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE) - /* must complete clone in terms of original request */ - dm_complete_request(rq, r); - return r; -} - static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, void *data) { @@ -394,13 +383,20 @@ static int map_request(struct dm_rq_target_io *tio) /* The target has remapped the I/O so dispatch it */ trace_block_rq_remap(clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); - ret = dm_dispatch_clone_request(clone, rq); - if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { + ret = blk_insert_cloned_request(clone); + switch (ret) { + case BLK_STS_OK: + break; + case BLK_STS_RESOURCE: + case BLK_STS_DEV_RESOURCE: blk_rq_unprep_clone(clone); blk_mq_cleanup_rq(clone); tio->ti->type->release_clone_rq(clone, &tio->info); tio->clone = NULL; return DM_MAPIO_REQUEUE; + default: + /* must complete clone in terms of original request */ + dm_complete_request(rq, ret); } break; case DM_MAPIO_REQUEUE: -- GitLab From a650628bde77f6ac5b1d532092346feff7b58c52 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:07 +0800 Subject: [PATCH 0464/1586] block: move submit_bio_checks() into submit_bio_noacct It is more clean & readable to check bio when starting to submit it, instead of just before calling ->submit_bio() or blk_mq_submit_bio(). Also it provides us chance to optimize bio submission without checking bio. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220216044514.2903784-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 5a4a590416298..d4a023667ac11 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -797,9 +797,6 @@ static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - if (unlikely(!submit_bio_checks(bio))) - return; - if (!disk->fops->submit_bio) blk_mq_submit_bio(bio); else @@ -893,6 +890,9 @@ static void __submit_bio_noacct_mq(struct bio *bio) */ void submit_bio_noacct(struct bio *bio) { + if (unlikely(!submit_bio_checks(bio))) + return; + /* * We only want one ->submit_bio to be active at a time, else stack * usage with stacked devices could be a problem. Use current->bio_list -- GitLab From 7f36b7d02a287ed18d02ae821868aa07b0235521 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:08 +0800 Subject: [PATCH 0465/1586] block: move blk_crypto_bio_prep() out of blk-mq.c blk_crypto_bio_prep() is called for both bio based and blk-mq drivers, so move it out of blk-mq.c, then we can unify this kind of handling. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220216044514.2903784-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 21 ++++++++------------- block/blk-mq.c | 3 --- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index d4a023667ac11..f03fff1fa3919 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -783,24 +783,19 @@ end_io: return false; } -static void __submit_bio_fops(struct gendisk *disk, struct bio *bio) -{ - if (blk_crypto_bio_prep(&bio)) { - if (likely(bio_queue_enter(bio) == 0)) { - disk->fops->submit_bio(bio); - blk_queue_exit(disk->queue); - } - } -} - static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - if (!disk->fops->submit_bio) + if (unlikely(!blk_crypto_bio_prep(&bio))) + return; + + if (!disk->fops->submit_bio) { blk_mq_submit_bio(bio); - else - __submit_bio_fops(disk, bio); + } else if (likely(bio_queue_enter(bio) == 0)) { + disk->fops->submit_bio(bio); + blk_queue_exit(disk->queue); + } } /* diff --git a/block/blk-mq.c b/block/blk-mq.c index 886836a54064c..7ca0b47246a61 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2788,9 +2788,6 @@ void blk_mq_submit_bio(struct bio *bio) unsigned int nr_segs = 1; blk_status_t ret; - if (unlikely(!blk_crypto_bio_prep(&bio))) - return; - blk_queue_bounce(q, &bio); if (blk_may_split(q, bio)) __blk_queue_split(q, &bio, &nr_segs); -- GitLab From 29ff23624e21c89d3321d6429dec8ad3847b534a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:09 +0800 Subject: [PATCH 0466/1586] block: don't declare submit_bio_checks in local header submit_bio_checks() won't be called outside of block/blk-core.c any more since commit 9d497e2941c3 ("block: don't protect submit_bio_checks by q_usage_counter"), so mark it as one local helper. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220216044514.2903784-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f03fff1fa3919..5248b94d276bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -676,7 +676,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_OK; } -noinline_for_stack bool submit_bio_checks(struct bio *bio) +static noinline_for_stack bool submit_bio_checks(struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); diff --git a/block/blk.h b/block/blk.h index abb663a2a147b..b2516cb4f98e6 100644 --- a/block/blk.h +++ b/block/blk.h @@ -46,7 +46,6 @@ void blk_freeze_queue(struct request_queue *q); void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); void blk_queue_start_drain(struct request_queue *q); int __bio_queue_enter(struct request_queue *q, struct bio *bio); -bool submit_bio_checks(struct bio *bio); static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) { -- GitLab From 3f98c753717c600eb5708e9b78b3eba6664bddf1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:10 +0800 Subject: [PATCH 0467/1586] block: don't check bio in blk_throtl_dispatch_work_fn The bio has been checked already before throttling, so no need to check it again before dispatching it from throttle queue. Add a helper of submit_bio_noacct_nocheck() for this purpose. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220216044514.2903784-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 30 +++++++++++++++++------------- block/blk-throttle.c | 2 +- block/blk.h | 1 + 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 5248b94d276bd..72b7b2214c708 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -874,20 +874,8 @@ static void __submit_bio_noacct_mq(struct bio *bio) current->bio_list = NULL; } -/** - * submit_bio_noacct - re-submit a bio to the block device layer for I/O - * @bio: The bio describing the location in memory and on the device. - * - * This is a version of submit_bio() that shall only be used for I/O that is - * resubmitted to lower level drivers by stacking block drivers. All file - * systems and other upper level users of the block layer should use - * submit_bio() instead. - */ -void submit_bio_noacct(struct bio *bio) +void submit_bio_noacct_nocheck(struct bio *bio) { - if (unlikely(!submit_bio_checks(bio))) - return; - /* * We only want one ->submit_bio to be active at a time, else stack * usage with stacked devices could be a problem. Use current->bio_list @@ -901,6 +889,22 @@ void submit_bio_noacct(struct bio *bio) else __submit_bio_noacct(bio); } + +/** + * submit_bio_noacct - re-submit a bio to the block device layer for I/O + * @bio: The bio describing the location in memory and on the device. + * + * This is a version of submit_bio() that shall only be used for I/O that is + * resubmitted to lower level drivers by stacking block drivers. All file + * systems and other upper level users of the block layer should use + * submit_bio() instead. + */ +void submit_bio_noacct(struct bio *bio) +{ + if (unlikely(!submit_bio_checks(bio))) + return; + submit_bio_noacct_nocheck(bio); +} EXPORT_SYMBOL(submit_bio_noacct); /** diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 73640d80e99ec..8770768f10009 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1218,7 +1218,7 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work) if (!bio_list_empty(&bio_list_on_stack)) { blk_start_plug(&plug); while ((bio = bio_list_pop(&bio_list_on_stack))) - submit_bio_noacct(bio); + submit_bio_noacct_nocheck(bio); blk_finish_plug(&plug); } } diff --git a/block/blk.h b/block/blk.h index b2516cb4f98e6..ebaa59ca46ca6 100644 --- a/block/blk.h +++ b/block/blk.h @@ -46,6 +46,7 @@ void blk_freeze_queue(struct request_queue *q); void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); void blk_queue_start_drain(struct request_queue *q); int __bio_queue_enter(struct request_queue *q, struct bio *bio); +void submit_bio_noacct_nocheck(struct bio *bio); static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) { -- GitLab From d24c670ec1f9f1dc320e59004e61f3491ae24546 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:11 +0800 Subject: [PATCH 0468/1586] block: merge submit_bio_checks() into submit_bio_noacct Now submit_bio_checks() is only called by submit_bio_noacct(), so merge it into submit_bio_noacct(). Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220216044514.2903784-6-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 209 +++++++++++++++++++++++------------------------ 1 file changed, 101 insertions(+), 108 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 72b7b2214c708..94bf37f8e61d2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -676,113 +676,6 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_OK; } -static noinline_for_stack bool submit_bio_checks(struct bio *bio) -{ - struct block_device *bdev = bio->bi_bdev; - struct request_queue *q = bdev_get_queue(bdev); - blk_status_t status = BLK_STS_IOERR; - struct blk_plug *plug; - - might_sleep(); - - plug = blk_mq_plug(q, bio); - if (plug && plug->nowait) - bio->bi_opf |= REQ_NOWAIT; - - /* - * For a REQ_NOWAIT based request, return -EOPNOTSUPP - * if queue does not support NOWAIT. - */ - if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q)) - goto not_supported; - - if (should_fail_bio(bio)) - goto end_io; - if (unlikely(bio_check_ro(bio))) - goto end_io; - if (!bio_flagged(bio, BIO_REMAPPED)) { - if (unlikely(bio_check_eod(bio))) - goto end_io; - if (bdev->bd_partno && unlikely(blk_partition_remap(bio))) - goto end_io; - } - - /* - * Filter flush bio's early so that bio based drivers without flush - * support don't have to worry about them. - */ - if (op_is_flush(bio->bi_opf) && - !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { - bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); - if (!bio_sectors(bio)) { - status = BLK_STS_OK; - goto end_io; - } - } - - if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - bio_clear_polled(bio); - - switch (bio_op(bio)) { - case REQ_OP_DISCARD: - if (!blk_queue_discard(q)) - goto not_supported; - break; - case REQ_OP_SECURE_ERASE: - if (!blk_queue_secure_erase(q)) - goto not_supported; - break; - case REQ_OP_WRITE_SAME: - if (!q->limits.max_write_same_sectors) - goto not_supported; - break; - case REQ_OP_ZONE_APPEND: - status = blk_check_zone_append(q, bio); - if (status != BLK_STS_OK) - goto end_io; - break; - case REQ_OP_ZONE_RESET: - case REQ_OP_ZONE_OPEN: - case REQ_OP_ZONE_CLOSE: - case REQ_OP_ZONE_FINISH: - if (!blk_queue_is_zoned(q)) - goto not_supported; - break; - case REQ_OP_ZONE_RESET_ALL: - if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q)) - goto not_supported; - break; - case REQ_OP_WRITE_ZEROES: - if (!q->limits.max_write_zeroes_sectors) - goto not_supported; - break; - default: - break; - } - - if (blk_throtl_bio(bio)) - return false; - - blk_cgroup_bio_start(bio); - blkcg_bio_issue_init(bio); - - if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_queue(bio); - /* Now that enqueuing has been traced, we need to trace - * completion as well. - */ - bio_set_flag(bio, BIO_TRACE_COMPLETION); - } - return true; - -not_supported: - status = BLK_STS_NOTSUPP; -end_io: - bio->bi_status = status; - bio_endio(bio); - return false; -} - static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; @@ -901,9 +794,109 @@ void submit_bio_noacct_nocheck(struct bio *bio) */ void submit_bio_noacct(struct bio *bio) { - if (unlikely(!submit_bio_checks(bio))) + struct block_device *bdev = bio->bi_bdev; + struct request_queue *q = bdev_get_queue(bdev); + blk_status_t status = BLK_STS_IOERR; + struct blk_plug *plug; + + might_sleep(); + + plug = blk_mq_plug(q, bio); + if (plug && plug->nowait) + bio->bi_opf |= REQ_NOWAIT; + + /* + * For a REQ_NOWAIT based request, return -EOPNOTSUPP + * if queue does not support NOWAIT. + */ + if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q)) + goto not_supported; + + if (should_fail_bio(bio)) + goto end_io; + if (unlikely(bio_check_ro(bio))) + goto end_io; + if (!bio_flagged(bio, BIO_REMAPPED)) { + if (unlikely(bio_check_eod(bio))) + goto end_io; + if (bdev->bd_partno && unlikely(blk_partition_remap(bio))) + goto end_io; + } + + /* + * Filter flush bio's early so that bio based drivers without flush + * support don't have to worry about them. + */ + if (op_is_flush(bio->bi_opf) && + !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { + bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); + if (!bio_sectors(bio)) { + status = BLK_STS_OK; + goto end_io; + } + } + + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + bio_clear_polled(bio); + + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + if (!blk_queue_discard(q)) + goto not_supported; + break; + case REQ_OP_SECURE_ERASE: + if (!blk_queue_secure_erase(q)) + goto not_supported; + break; + case REQ_OP_WRITE_SAME: + if (!q->limits.max_write_same_sectors) + goto not_supported; + break; + case REQ_OP_ZONE_APPEND: + status = blk_check_zone_append(q, bio); + if (status != BLK_STS_OK) + goto end_io; + break; + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + if (!blk_queue_is_zoned(q)) + goto not_supported; + break; + case REQ_OP_ZONE_RESET_ALL: + if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q)) + goto not_supported; + break; + case REQ_OP_WRITE_ZEROES: + if (!q->limits.max_write_zeroes_sectors) + goto not_supported; + break; + default: + break; + } + + if (blk_throtl_bio(bio)) return; + + blk_cgroup_bio_start(bio); + blkcg_bio_issue_init(bio); + + if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { + trace_block_bio_queue(bio); + /* Now that enqueuing has been traced, we need to trace + * completion as well. + */ + bio_set_flag(bio, BIO_TRACE_COMPLETION); + } submit_bio_noacct_nocheck(bio); + return; + +not_supported: + status = BLK_STS_NOTSUPP; +end_io: + bio->bi_status = status; + bio_endio(bio); } EXPORT_SYMBOL(submit_bio_noacct); -- GitLab From 9f5ede3c01f9951b0ae7d68b28762ad51d9bacc8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:12 +0800 Subject: [PATCH 0469/1586] block: throttle split bio in case of iops limit Commit 111be8839817 ("block-throttle: avoid double charge") marks bio as BIO_THROTTLED unconditionally if __blk_throtl_bio() is called on this bio, then this bio won't be called into __blk_throtl_bio() any more. This way is to avoid double charge in case of bio splitting. It is reasonable for read/write throughput limit, but not reasonable for IOPS limit because block layer provides io accounting against split bio. Chunguang Xu has already observed this issue and fixed it in commit 4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large IO scenarios"). However, that patch only covers bio splitting in __blk_queue_split(), and we have other kind of bio splitting, such as bio_split() & submit_bio_noacct() and other ways. This patch tries to fix the issue in one generic way by always charging the bio for iops limit in blk_throtl_bio(). This way is reasonable: re-submission & fast-cloned bio is charged if it is submitted to same disk/queue, and BIO_THROTTLED will be cleared if bio->bi_bdev is changed. This new approach can get much more smooth/stable iops limit compared with commit 4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large IO scenarios") since that commit can't throttle current split bios actually. Also this way won't cause new double bio iops charge in blk_throtl_dispatch_work_fn() in which blk_throtl_bio() won't be called any more. Reported-by: Ning Li Acked-by: Tejun Heo Cc: Chunguang Xu Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220216044514.2903784-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 -- block/blk-throttle.c | 10 +++++++--- block/blk-throttle.h | 2 -- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 4de34a332c9fd..f5255991b773c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -368,8 +368,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; - - blk_throtl_charge_bio_split(*bio); } } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 8770768f10009..c7aa26d52e845 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -807,7 +807,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); - if (bps_limit == U64_MAX) { + /* no need to throttle if this bio's bytes have been accounted */ + if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) { if (wait) *wait = 0; return true; @@ -919,9 +920,12 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio_size; + if (!bio_flagged(bio, BIO_THROTTLED)) { + tg->bytes_disp[rw] += bio_size; + tg->last_bytes_disp[rw] += bio_size; + } + tg->io_disp[rw]++; - tg->last_bytes_disp[rw] += bio_size; tg->last_io_disp[rw]++; /* diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 175f03abd9e41..cb43f4417d6ea 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -170,8 +170,6 @@ static inline bool blk_throtl_bio(struct bio *bio) { struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); - if (bio_flagged(bio, BIO_THROTTLED)) - return false; if (!tg->has_rules[bio_data_dir(bio)]) return false; -- GitLab From 5a93b6027eb4ef5db60a4bc5bdbeba5fb9f29384 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:13 +0800 Subject: [PATCH 0470/1586] block: don't try to throttle split bio if iops limit isn't set We need to throttle split bio in case of IOPS limit even though the split bio has been marked as BIO_THROTTLED since block layer accounts split bio actually. If only throughput throttle is setup, no need to throttle any more if BIO_THROTTLED is set since we have accounted & considered the whole bio bytes already. Add one flag of THROTL_TG_HAS_IOPS_LIMIT for serving this purpose. Acked-by: Tejun Heo Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220216044514.2903784-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-throttle.c | 21 ++++++++++++++------- block/blk-throttle.h | 11 +++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index c7aa26d52e845..ec72eced24d2c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -41,11 +41,6 @@ /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; -enum tg_state_flags { - THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ - THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ -}; - #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) /* We measure latency for request size from <= 4k to >= 1M */ @@ -425,12 +420,24 @@ static void tg_update_has_rules(struct throtl_grp *tg) struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq); struct throtl_data *td = tg->td; int rw; + int has_iops_limit = 0; + + for (rw = READ; rw <= WRITE; rw++) { + unsigned int iops_limit = tg_iops_limit(tg, rw); - for (rw = READ; rw <= WRITE; rw++) tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) || (td->limit_valid[td->limit_index] && (tg_bps_limit(tg, rw) != U64_MAX || - tg_iops_limit(tg, rw) != UINT_MAX)); + iops_limit != UINT_MAX)); + + if (iops_limit != UINT_MAX) + has_iops_limit = 1; + } + + if (has_iops_limit) + tg->flags |= THROTL_TG_HAS_IOPS_LIMIT; + else + tg->flags &= ~THROTL_TG_HAS_IOPS_LIMIT; } static void throtl_pd_online(struct blkg_policy_data *pd) diff --git a/block/blk-throttle.h b/block/blk-throttle.h index cb43f4417d6ea..c996a15f290e8 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -52,6 +52,12 @@ struct throtl_service_queue { struct timer_list pending_timer; /* fires on first_pending_disptime */ }; +enum tg_state_flags { + THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ + THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ + THROTL_TG_HAS_IOPS_LIMIT = 1 << 2, /* tg has iops limit */ +}; + enum { LIMIT_LOW, LIMIT_MAX, @@ -170,6 +176,11 @@ static inline bool blk_throtl_bio(struct bio *bio) { struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); + /* no need to throttle bps any more if the bio has been throttled */ + if (bio_flagged(bio, BIO_THROTTLED) && + !(tg->flags & THROTL_TG_HAS_IOPS_LIMIT)) + return false; + if (!tg->has_rules[bio_data_dir(bio)]) return false; -- GitLab From 34841e6fb125aa3f0e33e4eaac9f5eb86b2bb34b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Feb 2022 12:45:14 +0800 Subject: [PATCH 0471/1586] block: revert 4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large IO scenarios") Revert commit 4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large IO scenarios") since we have another easier way to address this issue and get better iops throttling result. Acked-by: Tejun Heo Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220216044514.2903784-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-throttle.c | 28 ---------------------------- block/blk-throttle.h | 5 ----- 2 files changed, 33 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index ec72eced24d2c..a3b3ebc72dd47 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -640,8 +640,6 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg, tg->bytes_disp[rw] = 0; tg->io_disp[rw] = 0; - atomic_set(&tg->io_split_cnt[rw], 0); - /* * Previous slice has expired. We must have trimmed it after last * bio dispatch. That means since start of last slice, we never used @@ -665,8 +663,6 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw) tg->slice_start[rw] = jiffies; tg->slice_end[rw] = jiffies + tg->td->throtl_slice; - atomic_set(&tg->io_split_cnt[rw], 0); - throtl_log(&tg->service_queue, "[%c] new slice start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], @@ -900,9 +896,6 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, jiffies + tg->td->throtl_slice); } - if (iops_limit != UINT_MAX) - tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0); - if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) && tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) { if (wait) @@ -1927,14 +1920,12 @@ static void throtl_downgrade_check(struct throtl_grp *tg) } if (tg->iops[READ][LIMIT_LOW]) { - tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0); iops = tg->last_io_disp[READ] * HZ / elapsed_time; if (iops >= tg->iops[READ][LIMIT_LOW]) tg->last_low_overflow_time[READ] = now; } if (tg->iops[WRITE][LIMIT_LOW]) { - tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0); iops = tg->last_io_disp[WRITE] * HZ / elapsed_time; if (iops >= tg->iops[WRITE][LIMIT_LOW]) tg->last_low_overflow_time[WRITE] = now; @@ -2053,25 +2044,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -void blk_throtl_charge_bio_split(struct bio *bio) -{ - struct blkcg_gq *blkg = bio->bi_blkg; - struct throtl_grp *parent = blkg_to_tg(blkg); - struct throtl_service_queue *parent_sq; - bool rw = bio_data_dir(bio); - - do { - if (!parent->has_rules[rw]) - break; - - atomic_inc(&parent->io_split_cnt[rw]); - atomic_inc(&parent->last_io_split_cnt[rw]); - - parent_sq = parent->service_queue.parent_sq; - parent = sq_to_tg(parent_sq); - } while (parent); -} - bool __blk_throtl_bio(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); diff --git a/block/blk-throttle.h b/block/blk-throttle.h index c996a15f290e8..b23a9f3abb824 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -138,9 +138,6 @@ struct throtl_grp { unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ unsigned long bio_cnt_reset_time; - atomic_t io_split_cnt[2]; - atomic_t last_io_split_cnt[2]; - struct blkg_rwstat stat_bytes; struct blkg_rwstat stat_ios; }; @@ -164,13 +161,11 @@ static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } static inline void blk_throtl_register_queue(struct request_queue *q) { } -static inline void blk_throtl_charge_bio_split(struct bio *bio) { } static inline bool blk_throtl_bio(struct bio *bio) { return false; } #else /* CONFIG_BLK_DEV_THROTTLING */ int blk_throtl_init(struct request_queue *q); void blk_throtl_exit(struct request_queue *q); void blk_throtl_register_queue(struct request_queue *q); -void blk_throtl_charge_bio_split(struct bio *bio); bool __blk_throtl_bio(struct bio *bio); static inline bool blk_throtl_bio(struct bio *bio) { -- GitLab From 76792055c4c8b2472ca1ae48e0ddaf8497529f08 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:10 +0100 Subject: [PATCH 0472/1586] block: add a ->free_disk method Add a method to notify the driver that the gendisk is about to be freed. This allows drivers to tie the lifetime of their private data to that of the gendisk and thus deal with device removal races without expensive synchronization and boilerplate code. A new flag is added so that ->free_disk is only called after a successful call to add_disk, which significantly simplifies the error handling path during probing. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220215094514.3828912-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 5 +++++ include/linux/blkdev.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index 9589d1d59afab..e351fac41bf25 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -526,6 +526,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, disk_update_readahead(disk); disk_add_events(disk); + set_bit(GD_ADDED, &disk->state); return 0; out_unregister_bdi: @@ -1119,6 +1120,10 @@ static void disk_release(struct device *dev) xa_destroy(&disk->part_tbl); disk->queue->disk = NULL; blk_put_queue(disk->queue); + + if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk) + disk->fops->free_disk(disk); + iput(disk->part0->bd_inode); /* frees the disk */ } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3bfc75a2a4509..f757f9c2871f8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -146,6 +146,7 @@ struct gendisk { #define GD_READ_ONLY 1 #define GD_DEAD 2 #define GD_NATIVE_CAPACITY 3 +#define GD_ADDED 4 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -1464,6 +1465,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); + void (*free_disk)(struct gendisk *disk); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, -- GitLab From e2efa0796607efe60c708271be483c3a2b0128de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:11 +0100 Subject: [PATCH 0473/1586] memstick/ms_block: simplify refcounting Implement the ->free_disk method to free the msb_data structure only once the last gendisk reference goes away instead of keeping a local refcount. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220215094514.3828912-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/memstick/core/ms_block.c | 64 ++++++++------------------------ drivers/memstick/core/ms_block.h | 1 - 2 files changed, 15 insertions(+), 50 deletions(-) diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 0cda6c6baefc3..3993bdd4b519c 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1943,22 +1943,6 @@ static void msb_io_work(struct work_struct *work) static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */ static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */ -static int msb_bd_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct msb_data *msb = disk->private_data; - - dbg_verbose("block device open"); - - mutex_lock(&msb_disk_lock); - - if (msb && msb->card) - msb->usage_count++; - - mutex_unlock(&msb_disk_lock); - return 0; -} - static void msb_data_clear(struct msb_data *msb) { kfree(msb->boot_page); @@ -1968,33 +1952,6 @@ static void msb_data_clear(struct msb_data *msb) msb->card = NULL; } -static int msb_disk_release(struct gendisk *disk) -{ - struct msb_data *msb = disk->private_data; - - dbg_verbose("block device release"); - mutex_lock(&msb_disk_lock); - - if (msb) { - if (msb->usage_count) - msb->usage_count--; - - if (!msb->usage_count) { - disk->private_data = NULL; - idr_remove(&msb_disk_idr, msb->disk_id); - put_disk(disk); - kfree(msb); - } - } - mutex_unlock(&msb_disk_lock); - return 0; -} - -static void msb_bd_release(struct gendisk *disk, fmode_t mode) -{ - msb_disk_release(disk); -} - static int msb_bd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { @@ -2003,6 +1960,17 @@ static int msb_bd_getgeo(struct block_device *bdev, return 0; } +static void msb_bd_free_disk(struct gendisk *disk) +{ + struct msb_data *msb = disk->private_data; + + mutex_lock(&msb_disk_lock); + idr_remove(&msb_disk_idr, msb->disk_id); + mutex_unlock(&msb_disk_lock); + + kfree(msb); +} + static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -2096,10 +2064,9 @@ static void msb_start(struct memstick_dev *card) } static const struct block_device_operations msb_bdops = { - .open = msb_bd_open, - .release = msb_bd_release, - .getgeo = msb_bd_getgeo, - .owner = THIS_MODULE + .owner = THIS_MODULE, + .getgeo = msb_bd_getgeo, + .free_disk = msb_bd_free_disk, }; static const struct blk_mq_ops msb_mq_ops = { @@ -2147,7 +2114,6 @@ static int msb_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dbg("Set total disk size to %lu sectors", capacity); - msb->usage_count = 1; msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM); INIT_WORK(&msb->io_work, msb_io_work); sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1); @@ -2229,7 +2195,7 @@ static void msb_remove(struct memstick_dev *card) msb_data_clear(msb); mutex_unlock(&msb_disk_lock); - msb_disk_release(msb->disk); + put_disk(msb->disk); memstick_set_drvdata(card, NULL); } diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h index 122e1a8a8bd5b..7058f9aefeb92 100644 --- a/drivers/memstick/core/ms_block.h +++ b/drivers/memstick/core/ms_block.h @@ -143,7 +143,6 @@ struct ms_boot_page { } __packed; struct msb_data { - unsigned int usage_count; struct memstick_dev *card; struct gendisk *disk; struct request_queue *queue; -- GitLab From 6dab421bfe06a59bf8f212a72e34673e8acf2018 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:12 +0100 Subject: [PATCH 0474/1586] memstick/mspro_block: fix handling of read-only devices Use set_disk_ro to propagate the read-only state to the block layer instead of checking for it in ->open and leaking a reference in case of a read-only device. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220215094514.3828912-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/memstick/core/mspro_block.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c0450397b6735..7ea312f0840e0 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -186,13 +186,8 @@ static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode) mutex_lock(&mspro_block_disk_lock); - if (msb && msb->card) { + if (msb && msb->card) msb->usage_count++; - if ((mode & FMODE_WRITE) && msb->read_only) - rc = -EROFS; - else - rc = 0; - } mutex_unlock(&mspro_block_disk_lock); @@ -1239,6 +1234,9 @@ static int mspro_block_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dev_dbg(&card->dev, "capacity set %ld\n", capacity); + if (msb->read_only) + set_disk_ro(msb->disk, true); + rc = device_add_disk(&card->dev, msb->disk, NULL); if (rc) goto out_cleanup_disk; -- GitLab From 185ed423d1898ead071c18f6161959cd3cab2dde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:13 +0100 Subject: [PATCH 0475/1586] memstick/mspro_block: simplify refcounting Implement the ->free_disk method to free the msb_data structure only once the last gendisk reference goes away instead of keeping a local refcount. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220215094514.3828912-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/memstick/core/mspro_block.c | 49 +++++------------------------ 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 7ea312f0840e0..725ba74ded308 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -133,7 +133,6 @@ struct mspro_devinfo { struct mspro_block_data { struct memstick_dev *card; - unsigned int usage_count; unsigned int caps; struct gendisk *disk; struct request_queue *queue; @@ -178,48 +177,16 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error); /*** Block device ***/ -static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct mspro_block_data *msb = disk->private_data; - int rc = -ENXIO; - - mutex_lock(&mspro_block_disk_lock); - - if (msb && msb->card) - msb->usage_count++; - - mutex_unlock(&mspro_block_disk_lock); - - return rc; -} - - -static void mspro_block_disk_release(struct gendisk *disk) +static void mspro_block_bd_free_disk(struct gendisk *disk) { struct mspro_block_data *msb = disk->private_data; int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; mutex_lock(&mspro_block_disk_lock); - - if (msb) { - if (msb->usage_count) - msb->usage_count--; - - if (!msb->usage_count) { - kfree(msb); - disk->private_data = NULL; - idr_remove(&mspro_block_disk_idr, disk_id); - put_disk(disk); - } - } - + idr_remove(&mspro_block_disk_idr, disk_id); mutex_unlock(&mspro_block_disk_lock); -} -static void mspro_block_bd_release(struct gendisk *disk, fmode_t mode) -{ - mspro_block_disk_release(disk); + kfree(msb); } static int mspro_block_bd_getgeo(struct block_device *bdev, @@ -235,10 +202,9 @@ static int mspro_block_bd_getgeo(struct block_device *bdev, } static const struct block_device_operations ms_block_bdops = { - .open = mspro_block_bd_open, - .release = mspro_block_bd_release, - .getgeo = mspro_block_bd_getgeo, - .owner = THIS_MODULE + .owner = THIS_MODULE, + .getgeo = mspro_block_bd_getgeo, + .free_disk = mspro_block_bd_free_disk, }; /*** Information ***/ @@ -1221,7 +1187,6 @@ static int mspro_block_init_disk(struct memstick_dev *card) msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT; msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT; msb->disk->fops = &ms_block_bdops; - msb->usage_count = 1; msb->disk->private_data = msb; sprintf(msb->disk->disk_name, "mspblk%d", disk_id); @@ -1339,7 +1304,7 @@ static void mspro_block_remove(struct memstick_dev *card) mspro_block_data_clear(msb); mutex_unlock(&mspro_block_disk_lock); - mspro_block_disk_release(msb->disk); + put_disk(msb->disk); memstick_set_drvdata(card, NULL); } -- GitLab From 24b45e6c25173abcf8d5e82285212b47f2b0f86b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:14 +0100 Subject: [PATCH 0476/1586] virtio_blk: simplify refcounting Implement the ->free_disk method to free the virtio_blk structure only once the last gendisk reference goes away instead of keeping a local refcount. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220215094514.3828912-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 66 ++++++++------------------------------ 1 file changed, 14 insertions(+), 52 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c443cd64fc9b4..5c636ca7f1a7f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -69,13 +69,6 @@ struct virtio_blk { /* Process context for config space updates */ struct work_struct config_work; - /* - * Tracks references from block_device_operations open/release and - * virtio_driver probe/remove so this object can be freed once no - * longer in use. - */ - refcount_t refs; - /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -391,43 +384,6 @@ out: return err; } -static void virtblk_get(struct virtio_blk *vblk) -{ - refcount_inc(&vblk->refs); -} - -static void virtblk_put(struct virtio_blk *vblk) -{ - if (refcount_dec_and_test(&vblk->refs)) { - ida_simple_remove(&vd_index_ida, vblk->index); - mutex_destroy(&vblk->vdev_mutex); - kfree(vblk); - } -} - -static int virtblk_open(struct block_device *bd, fmode_t mode) -{ - struct virtio_blk *vblk = bd->bd_disk->private_data; - int ret = 0; - - mutex_lock(&vblk->vdev_mutex); - - if (vblk->vdev) - virtblk_get(vblk); - else - ret = -ENXIO; - - mutex_unlock(&vblk->vdev_mutex); - return ret; -} - -static void virtblk_release(struct gendisk *disk, fmode_t mode) -{ - struct virtio_blk *vblk = disk->private_data; - - virtblk_put(vblk); -} - /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { @@ -460,11 +416,19 @@ out: return ret; } +static void virtblk_free_disk(struct gendisk *disk) +{ + struct virtio_blk *vblk = disk->private_data; + + ida_simple_remove(&vd_index_ida, vblk->index); + mutex_destroy(&vblk->vdev_mutex); + kfree(vblk); +} + static const struct block_device_operations virtblk_fops = { - .owner = THIS_MODULE, - .open = virtblk_open, - .release = virtblk_release, - .getgeo = virtblk_getgeo, + .owner = THIS_MODULE, + .getgeo = virtblk_getgeo, + .free_disk = virtblk_free_disk, }; static int index_to_minor(int index) @@ -791,8 +755,6 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_free_index; } - /* This reference is dropped in virtblk_remove(). */ - refcount_set(&vblk->refs, 1); mutex_init(&vblk->vdev_mutex); vblk->vdev = vdev; @@ -970,7 +932,7 @@ static void virtblk_remove(struct virtio_device *vdev) flush_work(&vblk->config_work); del_gendisk(vblk->disk); - blk_cleanup_disk(vblk->disk); + blk_cleanup_queue(vblk->disk->queue); blk_mq_free_tag_set(&vblk->tag_set); mutex_lock(&vblk->vdev_mutex); @@ -986,7 +948,7 @@ static void virtblk_remove(struct virtio_device *vdev) mutex_unlock(&vblk->vdev_mutex); - virtblk_put(vblk); + put_disk(vblk->disk); } #ifdef CONFIG_PM_SLEEP -- GitLab From 8f5fea65b06de1cc51d4fc23fb4d378d1abd6ed7 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 31 Jan 2022 15:33:37 -0500 Subject: [PATCH 0477/1586] blk-mq: avoid extending delays of active hctx from blk_mq_delay_run_hw_queues When blk_mq_delay_run_hw_queues sets an hctx to run in the future, it can reset the delay length for an already pending delayed work run_work. This creates a scenario where multiple hctx may have their queues set to run, but if one runs first and finds nothing to do, it can reset the delay of another hctx and stall the other hctx's ability to run requests. To avoid this I/O stall when an hctx's run_work is already pending, leave it untouched to run at its current designated time rather than extending its delay. The work will still run which keeps closed the race calling blk_mq_delay_run_hw_queues is needed for while also avoiding the I/O stall. Signed-off-by: David Jeffery Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220131203337.GA17666@redhat Signed-off-by: Jens Axboe --- block/blk-mq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 7ca0b47246a61..a05ce77250316 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2179,6 +2179,14 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) queue_for_each_hw_ctx(q, hctx, i) { if (blk_mq_hctx_stopped(hctx)) continue; + /* + * If there is already a run_work pending, leave the + * pending delay untouched. Otherwise, a hctx can stall + * if another hctx is re-delaying the other's work + * before the work executes. + */ + if (delayed_work_pending(&hctx->run_work)) + continue; /* * Dispatch from this hctx either if there's no hctx preferred * by IO scheduler or if it has requests that bypass the -- GitLab From bcd2be763252f3a4d5fc4d6008d4d96c601ee74b Mon Sep 17 00:00:00 2001 From: Yahu Gao Date: Fri, 7 Jan 2022 14:58:59 +0800 Subject: [PATCH 0478/1586] block/bfq_wf2q: correct weight to ioprio The return value is ioprio * BFQ_WEIGHT_CONVERSION_COEFF or 0. What we want is ioprio or 0. Correct this by changing the calculation. Signed-off-by: Yahu Gao Acked-by: Paolo Valente Link: https://lore.kernel.org/r/20220107065859.25689-1-gaoyahu19@gmail.com Signed-off-by: Jens Axboe --- block/bfq-wf2q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index b74cc0da118ec..709b901de3ca9 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -519,7 +519,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio) static unsigned short bfq_weight_to_ioprio(int weight) { return max_t(int, 0, - IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight); + IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF); } static void bfq_get_entity(struct bfq_entity *entity) -- GitLab From 869f2c94db92f0f1d6acd0dff1c1ebb8160f5e29 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Wed, 16 Feb 2022 09:40:25 +0800 Subject: [PATCH 0479/1586] spi: rockchip: Stop spi slave dma receiver when cs inactive The spi which's version is higher than ver 2 will automatically enable this feature. If the length of master transmission is uncertain, the RK spi slave is better to automatically stop after cs inactive instead of waiting for xfer_completion forever. Signed-off-by: Jon Lin Link: https://lore.kernel.org/r/20220216014028.8123-4-jon.lin@rock-chips.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 81 ++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index c6a1bb09be056..5ecd0692cca1c 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -133,7 +133,8 @@ #define INT_TF_OVERFLOW (1 << 1) #define INT_RF_UNDERFLOW (1 << 2) #define INT_RF_OVERFLOW (1 << 3) -#define INT_RF_FULL (1 << 4) +#define INT_RF_FULL (1 << 4) +#define INT_CS_INACTIVE (1 << 6) /* Bit fields in ICR, 4bit */ #define ICR_MASK 0x0f @@ -194,6 +195,8 @@ struct rockchip_spi { bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM]; bool slave_abort; + bool cs_inactive; /* spi slave tansmition stop when cs inactive */ + struct spi_transfer *xfer; /* Store xfer temporarily */ }; static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable) @@ -343,6 +346,15 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id) struct spi_controller *ctlr = dev_id; struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); + /* When int_cs_inactive comes, spi slave abort */ + if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) { + ctlr->slave_abort(ctlr); + writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR); + writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR); + + return IRQ_HANDLED; + } + if (rs->tx_left) rockchip_spi_pio_writer(rs); @@ -350,6 +362,7 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id) if (!rs->rx_left) { spi_enable_chip(rs, false); writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR); + writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR); spi_finalize_current_transfer(ctlr); } @@ -357,14 +370,18 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id) } static int rockchip_spi_prepare_irq(struct rockchip_spi *rs, - struct spi_transfer *xfer) + struct spi_controller *ctlr, + struct spi_transfer *xfer) { rs->tx = xfer->tx_buf; rs->rx = xfer->rx_buf; rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0; rs->rx_left = xfer->len / rs->n_bytes; - writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR); + if (rs->cs_inactive) + writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR); + else + writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR); spi_enable_chip(rs, true); if (rs->tx_left) @@ -383,6 +400,9 @@ static void rockchip_spi_dma_rxcb(void *data) if (state & TXDMA && !rs->slave_abort) return; + if (rs->cs_inactive) + writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR); + spi_enable_chip(rs, false); spi_finalize_current_transfer(ctlr); } @@ -423,14 +443,16 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs, atomic_set(&rs->state, 0); + rs->tx = xfer->tx_buf; + rs->rx = xfer->rx_buf; + rxdesc = NULL; if (xfer->rx_buf) { struct dma_slave_config rxconf = { .direction = DMA_DEV_TO_MEM, .src_addr = rs->dma_addr_rx, .src_addr_width = rs->n_bytes, - .src_maxburst = rockchip_spi_calc_burst_size(xfer->len / - rs->n_bytes), + .src_maxburst = rockchip_spi_calc_burst_size(xfer->len / rs->n_bytes), }; dmaengine_slave_config(ctlr->dma_rx, &rxconf); @@ -474,10 +496,13 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs, /* rx must be started before tx due to spi instinct */ if (rxdesc) { atomic_or(RXDMA, &rs->state); - dmaengine_submit(rxdesc); + ctlr->dma_rx->cookie = dmaengine_submit(rxdesc); dma_async_issue_pending(ctlr->dma_rx); } + if (rs->cs_inactive) + writel_relaxed(INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR); + spi_enable_chip(rs, true); if (txdesc) { @@ -584,7 +609,42 @@ static size_t rockchip_spi_max_transfer_size(struct spi_device *spi) static int rockchip_spi_slave_abort(struct spi_controller *ctlr) { struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); + u32 rx_fifo_left; + struct dma_tx_state state; + enum dma_status status; + + /* Get current dma rx point */ + if (atomic_read(&rs->state) & RXDMA) { + dmaengine_pause(ctlr->dma_rx); + status = dmaengine_tx_status(ctlr->dma_rx, ctlr->dma_rx->cookie, &state); + if (status == DMA_ERROR) { + rs->rx = rs->xfer->rx_buf; + rs->xfer->len = 0; + rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR); + for (; rx_fifo_left; rx_fifo_left--) + readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR); + goto out; + } else { + rs->rx += rs->xfer->len - rs->n_bytes * state.residue; + } + } + /* Get the valid data left in rx fifo and set rs->xfer->len real rx size */ + if (rs->rx) { + rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR); + for (; rx_fifo_left; rx_fifo_left--) { + u32 rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR); + + if (rs->n_bytes == 1) + *(u8 *)rs->rx = (u8)rxw; + else + *(u16 *)rs->rx = (u16)rxw; + rs->rx += rs->n_bytes; + } + rs->xfer->len = (unsigned int)(rs->rx - rs->xfer->rx_buf); + } + +out: if (atomic_read(&rs->state) & RXDMA) dmaengine_terminate_sync(ctlr->dma_rx); if (atomic_read(&rs->state) & TXDMA) @@ -626,7 +686,7 @@ static int rockchip_spi_transfer_one( } rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2; - + rs->xfer = xfer; use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false; ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave); @@ -636,7 +696,7 @@ static int rockchip_spi_transfer_one( if (use_dma) return rockchip_spi_prepare_dma(rs, ctlr, xfer); - return rockchip_spi_prepare_irq(rs, xfer); + return rockchip_spi_prepare_irq(rs, ctlr, xfer); } static bool rockchip_spi_can_dma(struct spi_controller *ctlr, @@ -815,8 +875,13 @@ static int rockchip_spi_probe(struct platform_device *pdev) switch (readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION)) { case ROCKCHIP_SPI_VER2_TYPE2: ctlr->mode_bits |= SPI_CS_HIGH; + if (ctlr->can_dma && slave_mode) + rs->cs_inactive = true; + else + rs->cs_inactive = false; break; default: + rs->cs_inactive = false; break; } -- GitLab From 3a4bf922d42efa4e9a3dc803d1fd786d43e8a501 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Wed, 16 Feb 2022 09:40:26 +0800 Subject: [PATCH 0480/1586] spi: rockchip: Preset cs-high and clk polarity in setup progress After power up, the cs and clock is in default status, and the cs-high and clock polarity dts property configuration will take no effect until the calling of rockchip_spi_config in the first transmission. So preset them to make sure a correct voltage before the first transmission coming. Signed-off-by: Jon Lin Link: https://lore.kernel.org/r/20220216014028.8123-5-jon.lin@rock-chips.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 5ecd0692cca1c..83da8fdb3c022 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -713,6 +713,29 @@ static bool rockchip_spi_can_dma(struct spi_controller *ctlr, return xfer->len / bytes_per_word >= rs->fifo_len; } +static int rockchip_spi_setup(struct spi_device *spi) +{ + struct rockchip_spi *rs = spi_controller_get_devdata(spi->controller); + u32 cr0; + + pm_runtime_get_sync(rs->dev); + + cr0 = readl_relaxed(rs->regs + ROCKCHIP_SPI_CTRLR0); + + cr0 &= ~(0x3 << CR0_SCPH_OFFSET); + cr0 |= ((spi->mode & 0x3) << CR0_SCPH_OFFSET); + if (spi->mode & SPI_CS_HIGH && spi->chip_select <= 1) + cr0 |= BIT(spi->chip_select) << CR0_SOI_OFFSET; + else if (spi->chip_select <= 1) + cr0 &= ~(BIT(spi->chip_select) << CR0_SOI_OFFSET); + + writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0); + + pm_runtime_put(rs->dev); + + return 0; +} + static int rockchip_spi_probe(struct platform_device *pdev) { int ret; @@ -840,6 +863,7 @@ static int rockchip_spi_probe(struct platform_device *pdev) ctlr->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX; ctlr->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT); + ctlr->setup = rockchip_spi_setup; ctlr->set_cs = rockchip_spi_set_cs; ctlr->transfer_one = rockchip_spi_transfer_one; ctlr->max_transfer_size = rockchip_spi_max_transfer_size; -- GitLab From e882575efc771f130a24322377dc1033551da11d Mon Sep 17 00:00:00 2001 From: shengfei Xu Date: Wed, 16 Feb 2022 09:40:27 +0800 Subject: [PATCH 0481/1586] spi: rockchip: Suspend and resume the bus during NOIRQ_SYSTEM_SLEEP_PM ops the wakeup interrupt handler which is guaranteed not to run while @resume noirq() is being executed. the patch can help to avoid the wakeup source try to access spi when the spi is in suspend mode. Signed-off-by: shengfei Xu Signed-off-by: Jon Lin Link: https://lore.kernel.org/r/20220216014028.8123-6-jon.lin@rock-chips.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 83da8fdb3c022..8b4d56ee21938 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -964,14 +964,14 @@ static int rockchip_spi_suspend(struct device *dev) { int ret; struct spi_controller *ctlr = dev_get_drvdata(dev); + struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); ret = spi_controller_suspend(ctlr); if (ret < 0) return ret; - ret = pm_runtime_force_suspend(dev); - if (ret < 0) - return ret; + clk_disable_unprepare(rs->spiclk); + clk_disable_unprepare(rs->apb_pclk); pinctrl_pm_select_sleep_state(dev); @@ -986,10 +986,14 @@ static int rockchip_spi_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - ret = pm_runtime_force_resume(dev); + ret = clk_prepare_enable(rs->apb_pclk); if (ret < 0) return ret; + ret = clk_prepare_enable(rs->spiclk); + if (ret < 0) + clk_disable_unprepare(rs->apb_pclk); + ret = spi_controller_resume(ctlr); if (ret < 0) { clk_disable_unprepare(rs->spiclk); @@ -1031,7 +1035,7 @@ static int rockchip_spi_runtime_resume(struct device *dev) #endif /* CONFIG_PM */ static const struct dev_pm_ops rockchip_spi_pm = { - SET_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume) SET_RUNTIME_PM_OPS(rockchip_spi_runtime_suspend, rockchip_spi_runtime_resume, NULL) }; -- GitLab From 2fcdde56c44fe1cd13ce328128f509bbda2cdb41 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Wed, 16 Feb 2022 09:40:28 +0800 Subject: [PATCH 0482/1586] spi: rockchip: clear interrupt status in error handler The interrupt status bit of the previous error data transmition will affect the next operation and cause continuous SPI transmission failure. Signed-off-by: Jon Lin Link: https://lore.kernel.org/r/20220216014028.8123-7-jon.lin@rock-chips.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 8b4d56ee21938..cdc16eecaf6b5 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -278,8 +278,9 @@ static void rockchip_spi_handle_err(struct spi_controller *ctlr, */ spi_enable_chip(rs, false); - /* make sure all interrupts are masked */ + /* make sure all interrupts are masked and status cleared */ writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR); + writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR); if (atomic_read(&rs->state) & TXDMA) dmaengine_terminate_async(ctlr->dma_tx); -- GitLab From 7fd786dfbd2c55ddee3b87f33c82f1c58bdb1dd6 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 18 Jan 2022 15:02:57 -0800 Subject: [PATCH 0483/1586] tools/power/x86/intel-speed-select: OOB daemon mode It is possible that some out of band agent changed config level. In this case CPUs need to be online/offline to support this config change. Add a command line option --oob, so that this tool can run as daemon and poll for config level change and take action. The poll interval is configurable in seconds using config option --poll-interval. Signed-off-by: Srinivas Pandruvada --- tools/power/x86/intel-speed-select/Build | 2 +- .../x86/intel-speed-select/isst-config.c | 51 +++- .../x86/intel-speed-select/isst-daemon.c | 239 ++++++++++++++++++ tools/power/x86/intel-speed-select/isst.h | 11 + 4 files changed, 292 insertions(+), 11 deletions(-) create mode 100644 tools/power/x86/intel-speed-select/isst-daemon.c diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build index b61456d75190f..86fb9020cca2b 100644 --- a/tools/power/x86/intel-speed-select/Build +++ b/tools/power/x86/intel-speed-select/Build @@ -1 +1 @@ -intel-speed-select-y += isst-config.o isst-core.o isst-display.o +intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index efe72fa48224a..de5029b54050d 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -368,7 +368,7 @@ int get_topo_max_cpus(void) return topo_max_cpus; } -static void set_cpu_online_offline(int cpu, int state) +void set_cpu_online_offline(int cpu, int state) { char buffer[128]; int fd, ret; @@ -409,12 +409,10 @@ static void force_all_cpus_online(void) unlink("/var/run/isst_cpu_topology.dat"); } -#define MAX_PACKAGE_COUNT 8 -#define MAX_DIE_PER_PACKAGE 2 -static void for_each_online_package_in_set(void (*callback)(int, void *, void *, - void *, void *), - void *arg1, void *arg2, void *arg3, - void *arg4) +void for_each_online_package_in_set(void (*callback)(int, void *, void *, + void *, void *), + void *arg1, void *arg2, void *arg3, + void *arg4) { int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT]; int pkg_index = 0, i; @@ -2803,7 +2801,9 @@ static void usage(void) printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n"); printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n"); printf("\t[-v|--version] : Print version\n"); - + printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n"); + printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n"); + printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n"); printf("\nResult format\n"); printf("\tResult display uses a common format for each command:\n"); printf("\tResults are formatted in text/JSON with\n"); @@ -2837,6 +2837,9 @@ static void cmdline(int argc, char **argv) int opt, force_cpus_online = 0; int option_index = 0; int ret; + int oob_mode = 0; + int poll_interval = -1; + int no_daemon = 0; static struct option long_options[] = { { "all-cpus-online", no_argument, 0, 'a' }, @@ -2849,6 +2852,9 @@ static void cmdline(int argc, char **argv) { "out", required_argument, 0, 'o' }, { "retry", required_argument, 0, 'r' }, { "version", no_argument, 0, 'v' }, + { "oob", no_argument, 0, 'b' }, + { "no-daemon", no_argument, 0, 'n' }, + { "poll-interval", required_argument, 0, 'w' }, { 0, 0, 0, 0 } }; @@ -2875,7 +2881,7 @@ static void cmdline(int argc, char **argv) } progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options, + while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -2920,12 +2926,26 @@ static void cmdline(int argc, char **argv) case 'v': print_version(); break; + case 'b': + oob_mode = 1; + break; + case 'n': + no_daemon = 1; + break; + case 'w': + ret = strtol(optarg, &ptr, 10); + if (!ret) { + fprintf(stderr, "Invalid poll interval count\n"); + exit(0); + } + poll_interval = ret; + break; default: usage(); } } - if (optind > (argc - 2)) { + if (optind > (argc - 2) && !oob_mode) { usage(); exit(0); } @@ -2936,6 +2956,17 @@ static void cmdline(int argc, char **argv) set_cpu_present_cpu_mask(); set_cpu_target_cpu_mask(); + if (oob_mode) { + create_cpu_map(); + if (debug_flag) + fprintf(stderr, "OOB mode is enabled in debug mode\n"); + + ret = isst_daemon(debug_flag, poll_interval, no_daemon); + if (ret) + fprintf(stderr, "OOB mode enable failed\n"); + goto out; + } + if (!is_clx_n_platform()) { ret = isst_fill_platform_info(); if (ret) diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c new file mode 100644 index 0000000000000..15a70bba8d760 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst-daemon.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Allow speed select to daemonize + * Copyright (c) 2022 Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "isst.h" + +static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; +static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; + +static void init_levels(void) +{ + int i, j; + + for (i = 0; i < MAX_PACKAGE_COUNT; ++i) + for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j) + per_package_levels_info[i][j] = -1; +} + +void process_level_change(int cpu) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + int pkg_id = get_physical_package_id(cpu); + int die_id = get_physical_die_id(cpu); + struct isst_pkg_ctdp pkg_dev; + time_t tm; + int ret; + + if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) { + debug_printf("Invalid package/die info for cpu:%d\n", cpu); + return; + } + + tm = time(NULL); + if (tm - per_package_levels_tm[pkg_id][die_id] < 2 ) + return; + + per_package_levels_tm[pkg_id][die_id] = tm; + + ret = isst_get_ctdp_levels(cpu, &pkg_dev); + if (ret) { + debug_printf("Can't get tdp levels for cpu:%d\n", cpu); + return; + } + + debug_printf("Get Config level %d pkg:%d die:%d current_level:%d \n", cpu, + pkg_id, die_id, pkg_dev.current_level); + + if (pkg_dev.locked) { + debug_printf("config TDP s locked \n"); + return; + } + + if (per_package_levels_info[pkg_id][die_id] == pkg_dev.current_level) + return; + + debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n", + cpu, pkg_id, die_id, per_package_levels_info[pkg_id][die_id], + pkg_dev.current_level); + + per_package_levels_info[pkg_id][die_id] = pkg_dev.current_level; + + ctdp_level.core_cpumask_size = + alloc_cpu_set(&ctdp_level.core_cpumask); + ret = isst_get_coremask_info(cpu, pkg_dev.current_level, &ctdp_level); + if (ret) { + free_cpu_set(ctdp_level.core_cpumask); + debug_printf("Can't get core_mask:%d\n", cpu); + return; + } + + if (ctdp_level.cpu_count) { + int i, max_cpus = get_topo_max_cpus(); + for (i = 0; i < max_cpus; ++i) { + if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i)) + continue; + if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { + fprintf(stderr, "online cpu %d\n", i); + set_cpu_online_offline(i, 1); + } else { + fprintf(stderr, "offline cpu %d\n", i); + set_cpu_online_offline(i, 0); + } + } + } + + free_cpu_set(ctdp_level.core_cpumask); +} + +static void _poll_for_config_change(int cpu, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + process_level_change(cpu); +} + +static void poll_for_config_change(void) +{ + for_each_online_package_in_set(_poll_for_config_change, NULL, NULL, + NULL, NULL); +} + +static int done = 0; +static int pid_file_handle; + +static void signal_handler(int sig) +{ + switch (sig) { + case SIGINT: + case SIGTERM: + done = 1; + exit(0); + break; + default: + break; + } +} + +static void daemonize(char *rundir, char *pidfile) +{ + int pid, sid, i; + char str[10]; + struct sigaction sig_actions; + sigset_t sig_set; + int ret; + + if (getppid() == 1) + return; + + sigemptyset(&sig_set); + sigaddset(&sig_set, SIGCHLD); + sigaddset(&sig_set, SIGTSTP); + sigaddset(&sig_set, SIGTTOU); + sigaddset(&sig_set, SIGTTIN); + sigprocmask(SIG_BLOCK, &sig_set, NULL); + + sig_actions.sa_handler = signal_handler; + sigemptyset(&sig_actions.sa_mask); + sig_actions.sa_flags = 0; + + sigaction(SIGHUP, &sig_actions, NULL); + sigaction(SIGTERM, &sig_actions, NULL); + sigaction(SIGINT, &sig_actions, NULL); + + pid = fork(); + if (pid < 0) { + /* Could not fork */ + exit(EXIT_FAILURE); + } + if (pid > 0) + exit(EXIT_SUCCESS); + + umask(027); + + sid = setsid(); + if (sid < 0) + exit(EXIT_FAILURE); + + /* close all descriptors */ + for (i = getdtablesize(); i >= 0; --i) + close(i); + + i = open("/dev/null", O_RDWR); + ret = dup(i); + if (ret == -1) + exit(EXIT_FAILURE); + + ret = dup(i); + if (ret == -1) + exit(EXIT_FAILURE); + + ret = chdir(rundir); + if (ret == -1) + exit(EXIT_FAILURE); + + pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600); + if (pid_file_handle == -1) { + /* Couldn't open lock file */ + exit(1); + } + /* Try to lock file */ +#ifdef LOCKF_SUPPORT + if (lockf(pid_file_handle, F_TLOCK, 0) == -1) { +#else + if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) { +#endif + /* Couldn't get lock on lock file */ + fprintf(stderr, "Couldn't get lock file %d\n", getpid()); + exit(1); + } + snprintf(str, sizeof(str), "%d\n", getpid()); + ret = write(pid_file_handle, str, strlen(str)); + if (ret == -1) + exit(EXIT_FAILURE); + + close(i); +} + +int isst_daemon(int debug_mode, int poll_interval, int no_daemon) +{ + int ret; + + if (!no_daemon && poll_interval < 0 && !debug_mode) { + fprintf(stderr, "OOB mode is enabled and will run as daemon\n"); + daemonize((char *) "/tmp/", + (char *)"/tmp/hfi-events.pid"); + } else { + signal(SIGINT, signal_handler); + } + + init_levels(); + + if (poll_interval < 0) { + fprintf(stderr, "Must specify poll-interval\n"); + return ret; + } + + debug_printf("Starting loop\n"); + while (!done) { + sleep(poll_interval); + poll_for_config_change(); + } + + return 0; +} diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 1aa15d5ea57ce..b33f2c68d2ce0 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -76,6 +76,9 @@ #define DISP_FREQ_MULTIPLIER 100 +#define MAX_PACKAGE_COUNT 8 +#define MAX_DIE_PER_PACKAGE 2 + struct isst_clos_config { int pkg_id; int die_id; @@ -260,4 +263,12 @@ extern int is_skx_based_platform(void); extern int is_spr_platform(void); extern int is_icx_platform(void); extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl); + +extern void set_cpu_online_offline(int cpu, int state); +extern void for_each_online_package_in_set(void (*callback)(int, void *, void *, + void *, void *), + void *arg1, void *arg2, void *arg3, + void *arg4); +extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon); +extern void process_level_change(int cpu); #endif -- GitLab From 7d440da009b6cd2a559cdb63d97e2cb569357dbc Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 18 Jan 2022 16:44:20 -0800 Subject: [PATCH 0484/1586] tools/power/x86/intel-speed-select: HFI support Read HFI (Hardware Feedback Interface) events to process config level changes in oob mode. When HFI is supported there is no need for polling to check config level change. Subscribe to Linux thermal netlink messages and process message: THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE. This message contains cpu number, performance and energy efficiency. Signed-off-by: Srinivas Pandruvada --- tools/power/x86/intel-speed-select/Build | 2 +- tools/power/x86/intel-speed-select/Makefile | 10 +- .../power/x86/intel-speed-select/hfi-events.c | 309 ++++++++++++++++++ .../x86/intel-speed-select/isst-daemon.c | 5 + tools/power/x86/intel-speed-select/isst.h | 2 + 5 files changed, 324 insertions(+), 4 deletions(-) create mode 100644 tools/power/x86/intel-speed-select/hfi-events.c diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build index 86fb9020cca2b..81e36bd578b1c 100644 --- a/tools/power/x86/intel-speed-select/Build +++ b/tools/power/x86/intel-speed-select/Build @@ -1 +1 @@ -intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o +intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile index 12c6939dca2a4..d2fba1297d964 100644 --- a/tools/power/x86/intel-speed-select/Makefile +++ b/tools/power/x86/intel-speed-select/Makefile @@ -13,8 +13,8 @@ endif # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r - -override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3 +override LDFLAGS += -lnl-genl-3 -lnl-3 ALL_TARGETS := intel-speed-select ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) @@ -31,7 +31,11 @@ $(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h mkdir -p $(OUTPUT)include/linux 2>&1 || true ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@ -prepare: $(OUTPUT)include/linux/isst_if.h +$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h + mkdir -p $(OUTPUT)include/linux 2>&1 || true + ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@ + +prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h ISST_IN := $(OUTPUT)intel-speed-select-in.o diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c new file mode 100644 index 0000000000000..e856767113725 --- /dev/null +++ b/tools/power/x86/intel-speed-select/hfi-events.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Read HFI events for OOB + * Copyright (c) 2022 Intel Corporation. + */ + +/* + * This file incorporates work covered by the following copyright and + * permission notice: + + * WPA Supplicant - driver interaction with Linux nl80211/cfg80211 + * Copyright (c) 2003-2008, Jouni Malinen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Alternatively, this software may be distributed under the terms of + * BSD license. + * + * Requires + * libnl-genl-3-dev + * + * For Fedora/CenOS + * dnf install libnl3-devel + * For Ubuntu + * apt install libnl-3-dev libnl-genl-3-dev + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "isst.h" + +struct hfi_event_data { + struct nl_sock *nl_handle; + struct nl_cb *nl_cb; +}; + +struct hfi_event_data drv; + +static int ack_handler(struct nl_msg *msg, void *arg) +{ + int *err = arg; + *err = 0; + return NL_STOP; +} + +static int finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + *ret = 0; + return NL_SKIP; +} + +static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, + void *arg) +{ + int *ret = arg; + *ret = err->error; + return NL_SKIP; +} + +static int seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int send_and_recv_msgs(struct hfi_event_data *drv, + struct nl_msg *msg, + int (*valid_handler)(struct nl_msg *, void *), + void *valid_data) +{ + struct nl_cb *cb; + int err = -ENOMEM; + + cb = nl_cb_clone(drv->nl_cb); + if (!cb) + goto out; + + err = nl_send_auto_complete(drv->nl_handle, msg); + if (err < 0) + goto out; + + err = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err); + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err); + + if (valid_handler) + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, + valid_handler, valid_data); + + while (err > 0) + nl_recvmsgs(drv->nl_handle, cb); + out: + nl_cb_put(cb); + nlmsg_free(msg); + return err; +} + +struct family_data { + const char *group; + int id; +}; + +static int family_handler(struct nl_msg *msg, void *arg) +{ + struct family_data *res = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int i; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) { + struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1]; + nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp), + nla_len(mcgrp), NULL); + if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] || + !tb2[CTRL_ATTR_MCAST_GRP_ID] || + strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]), + res->group, + nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0) + continue; + res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]); + break; + }; + + return 0; +} + +static int nl_get_multicast_id(struct hfi_event_data *drv, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = -1; + struct family_data res = { group, -ENOENT }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"), + 0, 0, CTRL_CMD_GETFAMILY, 0); + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = send_and_recv_msgs(drv, msg, family_handler, &res); + msg = NULL; + if (ret == 0) + ret = res.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +struct perf_cap { + int cpu; + int perf; + int eff; +}; + +static void process_hfi_event(struct perf_cap *perf_cap) +{ + process_level_change(perf_cap->cpu); +} + +static int handle_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + int ret; + struct perf_cap perf_cap; + + ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret); + if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) { + struct nlattr *cap; + int j, index = 0; + + debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n"); + nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) { + switch (index) { + case 0: + perf_cap.cpu = nla_get_u32(cap); + break; + case 1: + perf_cap.perf = nla_get_u32(cap); + break; + case 2: + perf_cap.eff = nla_get_u32(cap); + break; + default: + break; + } + ++index; + if (index == 3) { + index = 0; + process_hfi_event(&perf_cap); + } + } + } + + return 0; +} + +static int _hfi_exit; + +static int check_hf_suport(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __cpuid(6, eax, ebx, ecx, edx); + if (eax & BIT(19)) + return 1; + + return 0; +} + +int hfi_main(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int err = 0; + int mcast_id; + int no_block = 0; + + if (!check_hf_suport()) { + fprintf(stderr, "CPU Doesn't support HFI\n"); + return -1; + } + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "nl_socket_alloc failed\n"); + return -1; + } + + if (genl_connect(sock)) { + fprintf(stderr, "genl_connect(sk_event) failed\n"); + goto free_sock; + } + + drv.nl_handle = sock; + drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT); + if (drv.nl_cb == NULL) { + printf("Failed to allocate netlink callbacks"); + goto free_sock; + } + + mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME, + THERMAL_GENL_EVENT_GROUP_NAME); + if (mcast_id < 0) { + fprintf(stderr, "nl_get_multicast_id failed\n"); + goto free_sock; + } + + if (nl_socket_add_membership(sock, mcast_id)) { + fprintf(stderr, "nl_socket_add_membership failed"); + goto free_sock; + } + + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL); + + if (no_block) + nl_socket_set_nonblocking(sock); + + debug_printf("hfi is initialized\n"); + + while (!_hfi_exit && !err) { + err = nl_recvmsgs(sock, cb); + debug_printf("nl_recv_message err:%d\n", err); + } + + return 0; + + /* Netlink library doesn't have calls to dealloc cb or disconnect */ +free_sock: + nl_socket_free(sock); + + return -1; +} + +void hfi_exit(void) +{ + _hfi_exit = 1; +} diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c index 15a70bba8d760..dd372924bc826 100644 --- a/tools/power/x86/intel-speed-select/isst-daemon.c +++ b/tools/power/x86/intel-speed-select/isst-daemon.c @@ -123,6 +123,7 @@ static void signal_handler(int sig) case SIGINT: case SIGTERM: done = 1; + hfi_exit(); exit(0); break; default: @@ -225,6 +226,10 @@ int isst_daemon(int debug_mode, int poll_interval, int no_daemon) init_levels(); if (poll_interval < 0) { + ret = hfi_main(); + if (ret) { + fprintf(stderr, "HFI initialization failed\n"); + } fprintf(stderr, "Must specify poll-interval\n"); return ret; } diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index b33f2c68d2ce0..0796d8c6a8827 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -271,4 +271,6 @@ extern void for_each_online_package_in_set(void (*callback)(int, void *, void *, void *arg4); extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon); extern void process_level_change(int cpu); +extern int hfi_main(void); +extern void hfi_exit(void); #endif -- GitLab From f3874e96fad596cd2c2441802be7e1d0c9fa8677 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 18 Jan 2022 16:52:26 -0800 Subject: [PATCH 0485/1586] tools/power/x86/intel-speed-select: v1.12 release This version allows out of band SST support, where some remote agent changes SST profiles via some Board Management Controller. Signed-off-by: Srinivas Pandruvada --- tools/power/x86/intel-speed-select/isst-config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index de5029b54050d..060390e88e374 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,8 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.11"; +static const char *version_str = "v1.12"; + static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; -- GitLab From b06e15ebd5bfb670f93c7f11a29b8299c1178bc6 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 14 Feb 2022 23:41:08 +0500 Subject: [PATCH 0486/1586] selftests/x86: Add validity check and allow field splitting Add check to test if CC has a string. CC can have multiple sub-strings like "ccache gcc". Erorr pops up if it is treated as single string and double quotes are used around it. This can be fixed by removing the quotes and not treating CC as a single string. Fixes: e9886ace222e ("selftests, x86: Rework x86 target architecture detection") Reported-by: "kernelci.org bot" Signed-off-by: Muhammad Usama Anjum Signed-off-by: Dave Hansen Link: https://lkml.kernel.org/r/20220214184109.3739179-2-usama.anjum@collabora.com --- tools/testing/selftests/x86/check_cc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf549..8c669c0d662ee 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 -- GitLab From 6170abb21e2380477080b25145da9747ad467d3d Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 14 Feb 2022 23:41:09 +0500 Subject: [PATCH 0487/1586] selftests/sgx: Treat CC as one argument CC can have multiple sub-strings like "ccache gcc". For check_cc.sh, CC needs to be treated like one argument. Put double quotes around it to make CC one string and hence one argument. Fixes: 2adcba79e69d ("selftests/x86: Add a selftest for SGX") Reported-by: "kernelci.org bot" Signed-off-by: Muhammad Usama Anjum Signed-off-by: Dave Hansen Link: https://lkml.kernel.org/r/20220214184109.3739179-3-usama.anjum@collabora.com --- tools/testing/selftests/sgx/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 2956584e1e37f..75af864e07b65 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all clean -CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \ +CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \ ../x86/trivial_64bit_program.c) ifndef OBJCOPY -- GitLab From 4ba31cdd88c9008777a48d3ac2b045dce5634389 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 9 Feb 2022 10:21:58 +0000 Subject: [PATCH 0488/1586] crypto: cavium/nitrox - fix typo on crypto crypto had a typo, fix it. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_req.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h index ed174883c8e30..6bf088bcdd117 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_req.h +++ b/drivers/crypto/cavium/nitrox/nitrox_req.h @@ -440,7 +440,7 @@ struct aqmq_command_s { /** * struct ctx_hdr - Book keeping data about the crypto context * @pool: Pool used to allocate crypto context - * @dma: Base DMA address of the cypto context + * @dma: Base DMA address of the crypto context * @ctx_dma: Actual usable crypto context for NITROX */ struct ctx_hdr { -- GitLab From fffe799b6cc980f967df2f3773dc53639dd68d7e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Feb 2022 20:00:47 +0200 Subject: [PATCH 0489/1586] crypto: qat - don't cast parameter in bit operations While in this particular case it would not be a (critical) issue, the pattern itself is bad and error prone in case the location of the parameter is changed. Don't cast parameter to unsigned long pointer in the bit operations. Instead copy to a local variable on stack of a proper type and use. Fixes: b4b7e67c917f ("crypto: qat - Intel(R) QAT ucode part of fw loader") Signed-off-by: Andy Shevchenko Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_uclo.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 2026cc6be8f0e..6356402a2c9e8 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -387,7 +387,9 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle, page = image->page; for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { - if (!test_bit(ae, (unsigned long *)&uof_image->ae_assigned)) + unsigned long ae_assigned = uof_image->ae_assigned; + + if (!test_bit(ae, &ae_assigned)) continue; if (!test_bit(ae, &cfg_ae_mask)) @@ -664,8 +666,9 @@ static int qat_uclo_map_ae(struct icp_qat_fw_loader_handle *handle, int max_ae) continue; for (i = 0; i < obj_handle->uimage_num; i++) { - if (!test_bit(ae, (unsigned long *) - &obj_handle->ae_uimage[i].img_ptr->ae_assigned)) + unsigned long ae_assigned = obj_handle->ae_uimage[i].img_ptr->ae_assigned; + + if (!test_bit(ae, &ae_assigned)) continue; mflag = 1; if (qat_uclo_init_ae_data(obj_handle, ae, i)) -- GitLab From dfe085d8dcd0bb1fe20cc2327e81c8064cead441 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 10 Feb 2022 13:09:40 +1100 Subject: [PATCH 0490/1586] crypto: xts - Add softdep on ecb The xts module needs ecb to be present as it's meant to work on top of ecb. This patch adds a softdep so ecb can be included automatically into the initramfs. Reported-by: rftc Signed-off-by: Herbert Xu --- crypto/xts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/xts.c b/crypto/xts.c index 6c12f30dbdd6d..63c85b9e64e08 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -466,3 +466,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_SOFTDEP("pre: ecb"); -- GitLab From f60bbbbe8039e59341055e827bb404c14a2688a0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 10 Feb 2022 13:31:13 +1100 Subject: [PATCH 0491/1586] crypto: lrw - Add dependency on ecb The lrw template relies on ecb to work. So we need to declare a Kconfig dependency as well as a module softdep on it. Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/lrw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index fa1741bb568f9..d9573b3f081ff 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -425,6 +425,7 @@ config CRYPTO_LRW select CRYPTO_SKCIPHER select CRYPTO_MANAGER select CRYPTO_GF128MUL + select CRYPTO_ECB help LRW: Liskov Rivest Wagner, a tweakable, non malleable, non movable narrow block cipher mode for dm-crypt. Use it with cipher diff --git a/crypto/lrw.c b/crypto/lrw.c index bcf09fbc750af..8d59a66b65255 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -428,3 +428,4 @@ module_exit(lrw_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LRW block cipher mode"); MODULE_ALIAS_CRYPTO("lrw"); +MODULE_SOFTDEP("pre: ecb"); -- GitLab From 605b84ae0beb8eef078b3d55e548b1dd6e75aeb1 Mon Sep 17 00:00:00 2001 From: Wojciech Ziemba Date: Thu, 10 Feb 2022 13:38:25 +0000 Subject: [PATCH 0492/1586] crypto: qat - add misc workqueue In an effort to reduce the amount of workqueues, scattered across the QAT driver, introduce the misc workqueue. This queue will be used to handle bottom halves, Power Management and more in the future. The function adf_misc_wq_queue_work() has been added to simplify the enqueuing of jobs. Signed-off-by: Wojciech Ziemba Reviewed-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- .../crypto/qat/qat_common/adf_common_drv.h | 3 ++ drivers/crypto/qat/qat_common/adf_ctl_drv.c | 6 ++++ drivers/crypto/qat/qat_common/adf_isr.c | 28 +++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 76f4f96ec5eb0..0775491768797 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -188,6 +188,9 @@ int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle, void *addr_ptr, u32 mem_size, char *obj_name); int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle, unsigned int cfg_ae_mask); +int adf_init_misc_wq(void); +void adf_exit_misc_wq(void); +bool adf_misc_wq_queue_work(struct work_struct *work); #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 6f64aa6931461..e8ac932bbaab6 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -419,6 +419,9 @@ static int __init adf_register_ctl_device_driver(void) if (adf_chr_drv_create()) goto err_chr_dev; + if (adf_init_misc_wq()) + goto err_misc_wq; + if (adf_init_aer()) goto err_aer; @@ -440,6 +443,8 @@ err_vf_wq: err_pf_wq: adf_exit_aer(); err_aer: + adf_exit_misc_wq(); +err_misc_wq: adf_chr_drv_destroy(); err_chr_dev: mutex_destroy(&adf_ctl_lock); @@ -449,6 +454,7 @@ err_chr_dev: static void __exit adf_unregister_ctl_device_driver(void) { adf_chr_drv_destroy(); + adf_exit_misc_wq(); adf_exit_aer(); adf_exit_vf_wq(); adf_exit_pf_wq(); diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index 4ca482aa69f7c..803b89ba9670c 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -16,6 +16,7 @@ #include "adf_transport_internal.h" #define ADF_MAX_NUM_VFS 32 +static struct workqueue_struct *adf_misc_wq; static int adf_enable_msix(struct adf_accel_dev *accel_dev) { @@ -341,3 +342,30 @@ err_out: return ret; } EXPORT_SYMBOL_GPL(adf_isr_resource_alloc); + +/** + * adf_init_misc_wq() - Init misc workqueue + * + * Function init workqueue 'qat_misc_wq' for general purpose. + * + * Return: 0 on success, error code otherwise. + */ +int __init adf_init_misc_wq(void) +{ + adf_misc_wq = alloc_workqueue("qat_misc_wq", WQ_MEM_RECLAIM, 0); + + return !adf_misc_wq ? -ENOMEM : 0; +} + +void adf_exit_misc_wq(void) +{ + if (adf_misc_wq) + destroy_workqueue(adf_misc_wq); + + adf_misc_wq = NULL; +} + +bool adf_misc_wq_queue_work(struct work_struct *work) +{ + return queue_work(adf_misc_wq, work); +} -- GitLab From f734409c77d7e422bc759c53ad234e6af9b56938 Mon Sep 17 00:00:00 2001 From: Wojciech Ziemba Date: Thu, 10 Feb 2022 13:38:26 +0000 Subject: [PATCH 0493/1586] crypto: qat - move and rename GEN4 error register definitions Move error source related CSRs from 4xxx to the wider GEN4 header file. Signed-off-by: Wojciech Ziemba Reviewed-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 6 +++--- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h | 14 -------------- drivers/crypto/qat/qat_common/adf_gen4_hw_data.h | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 68d39c833332e..69fb271c85dd6 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -229,7 +229,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) void __iomem *csr = misc_bar->virt_addr; /* Enable all in errsou3 except VFLR notification on host */ - ADF_CSR_WR(csr, ADF_4XXX_ERRMSK3, ADF_4XXX_VFLNOTIFY); + ADF_CSR_WR(csr, ADF_GEN4_ERRMSK3, ADF_GEN4_VFLNOTIFY); } static void adf_enable_ints(struct adf_accel_dev *accel_dev) @@ -256,9 +256,9 @@ static int adf_init_device(struct adf_accel_dev *accel_dev) addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr; /* Temporarily mask PM interrupt */ - csr = ADF_CSR_RD(addr, ADF_4XXX_ERRMSK2); + csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2); csr |= ADF_4XXX_PM_SOU; - ADF_CSR_WR(addr, ADF_4XXX_ERRMSK2, csr); + ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr); /* Set DRV_ACTIVE bit to power up the device */ ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE); diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h index 12e4fb9b40cef..857b93a3c032d 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h @@ -39,20 +39,6 @@ #define ADF_4XXX_NUM_RINGS_PER_BANK 2 #define ADF_4XXX_NUM_BANKS_PER_VF 4 -/* Error source registers */ -#define ADF_4XXX_ERRSOU0 (0x41A200) -#define ADF_4XXX_ERRSOU1 (0x41A204) -#define ADF_4XXX_ERRSOU2 (0x41A208) -#define ADF_4XXX_ERRSOU3 (0x41A20C) - -/* Error source mask registers */ -#define ADF_4XXX_ERRMSK0 (0x41A210) -#define ADF_4XXX_ERRMSK1 (0x41A214) -#define ADF_4XXX_ERRMSK2 (0x41A218) -#define ADF_4XXX_ERRMSK3 (0x41A21C) - -#define ADF_4XXX_VFLNOTIFY BIT(7) - /* Arbiter configuration */ #define ADF_4XXX_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) #define ADF_4XXX_ARB_OFFSET (0x0) diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h index f0f71ca44ca36..43b8f864806bd 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h @@ -122,6 +122,20 @@ do { \ #define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0) #define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4) +/* Error source registers */ +#define ADF_GEN4_ERRSOU0 (0x41A200) +#define ADF_GEN4_ERRSOU1 (0x41A204) +#define ADF_GEN4_ERRSOU2 (0x41A208) +#define ADF_GEN4_ERRSOU3 (0x41A20C) + +/* Error source mask registers */ +#define ADF_GEN4_ERRMSK0 (0x41A210) +#define ADF_GEN4_ERRMSK1 (0x41A214) +#define ADF_GEN4_ERRMSK2 (0x41A218) +#define ADF_GEN4_ERRMSK3 (0x41A21C) + +#define ADF_GEN4_VFLNOTIFY BIT(7) + void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number); -- GitLab From e5745f34113b758b45d134dec04a7df94dc67131 Mon Sep 17 00:00:00 2001 From: Wojciech Ziemba Date: Thu, 10 Feb 2022 13:38:27 +0000 Subject: [PATCH 0494/1586] crypto: qat - enable power management for QAT GEN4 Add support for HW QAT Power Management (PM) feature. This feature is enabled at init time (1) by sending an admin message to the firmware, targeting the admin AE, that sets the idle time before the device changes state and (2) by unmasking the PM source of interrupt in ERRMSK2. The interrupt handler is extended to handle a PM interrupt which is triggered by HW when a PM transition occurs. In this case, the driver responds acknowledging the transaction using the HOST_MSG mailbox. Signed-off-by: Wojciech Ziemba Co-developed-by: Marcinx Malinowski Signed-off-by: Marcinx Malinowski Reviewed-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 15 +- .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.h | 10 -- drivers/crypto/qat/qat_common/Makefile | 1 + .../crypto/qat/qat_common/adf_accel_devices.h | 2 + drivers/crypto/qat/qat_common/adf_admin.c | 37 +++++ .../crypto/qat/qat_common/adf_common_drv.h | 1 + drivers/crypto/qat/qat_common/adf_gen4_pm.c | 137 ++++++++++++++++++ drivers/crypto/qat/qat_common/adf_gen4_pm.h | 44 ++++++ drivers/crypto/qat/qat_common/adf_init.c | 6 + drivers/crypto/qat/qat_common/adf_isr.c | 14 ++ .../qat/qat_common/icp_qat_fw_init_admin.h | 1 + 11 files changed, 252 insertions(+), 16 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_pm.c create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_pm.h diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 69fb271c85dd6..fb5970a684844 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "adf_4xxx_hw_data.h" #include "icp_qat_hw.h" @@ -257,18 +258,18 @@ static int adf_init_device(struct adf_accel_dev *accel_dev) /* Temporarily mask PM interrupt */ csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2); - csr |= ADF_4XXX_PM_SOU; + csr |= ADF_GEN4_PM_SOU; ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr); /* Set DRV_ACTIVE bit to power up the device */ - ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE); + ADF_CSR_WR(addr, ADF_GEN4_PM_INTERRUPT, ADF_GEN4_PM_DRV_ACTIVE); /* Poll status register to make sure the device is powered up */ ret = read_poll_timeout(ADF_CSR_RD, status, - status & ADF_4XXX_PM_INIT_STATE, - ADF_4XXX_PM_POLL_DELAY_US, - ADF_4XXX_PM_POLL_TIMEOUT_US, true, addr, - ADF_4XXX_PM_STATUS); + status & ADF_GEN4_PM_INIT_STATE, + ADF_GEN4_PM_POLL_DELAY_US, + ADF_GEN4_PM_POLL_TIMEOUT_US, true, addr, + ADF_GEN4_PM_STATUS); if (ret) dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n"); @@ -354,6 +355,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; + hw_data->enable_pm = adf_gen4_enable_pm; + hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops); diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h index 857b93a3c032d..1034752845ca2 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h @@ -49,16 +49,6 @@ #define ADF_4XXX_ADMINMSGLR_OFFSET (0x500578) #define ADF_4XXX_MAILBOX_BASE_OFFSET (0x600970) -/* Power management */ -#define ADF_4XXX_PM_POLL_DELAY_US 20 -#define ADF_4XXX_PM_POLL_TIMEOUT_US USEC_PER_SEC -#define ADF_4XXX_PM_STATUS (0x50A00C) -#define ADF_4XXX_PM_INTERRUPT (0x50A028) -#define ADF_4XXX_PM_DRV_ACTIVE BIT(20) -#define ADF_4XXX_PM_INIT_STATE BIT(21) -/* Power management source in ERRSOU2 and ERRMSK2 */ -#define ADF_4XXX_PM_SOU BIT(18) - /* Firmware Binaries */ #define ADF_4XXX_FW "qat_4xxx.bin" #define ADF_4XXX_MMP "qat_4xxx_mmp.bin" diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 7e191a42a5c7e..f25a6c8edfc73 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ adf_gen2_hw_data.o \ adf_gen4_hw_data.o \ + adf_gen4_pm.o \ qat_crypto.o \ qat_algs.o \ qat_asym_algs.o \ diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 2d4cd7c7cf33b..a03c6cf723312 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -184,6 +184,8 @@ struct adf_hw_device_data { void (*exit_arb)(struct adf_accel_dev *accel_dev); const u32 *(*get_arb_mapping)(void); int (*init_device)(struct adf_accel_dev *accel_dev); + int (*enable_pm)(struct adf_accel_dev *accel_dev); + bool (*handle_pm_interrupt)(struct adf_accel_dev *accel_dev); void (*disable_iov)(struct adf_accel_dev *accel_dev); void (*configure_iov_threads)(struct adf_accel_dev *accel_dev, bool enable); diff --git a/drivers/crypto/qat/qat_common/adf_admin.c b/drivers/crypto/qat/qat_common/adf_admin.c index 498eb6f690e37..3b6184c350811 100644 --- a/drivers/crypto/qat/qat_common/adf_admin.c +++ b/drivers/crypto/qat/qat_common/adf_admin.c @@ -251,6 +251,43 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev) } EXPORT_SYMBOL_GPL(adf_send_admin_init); +/** + * adf_init_admin_pm() - Function sends PM init message to FW + * @accel_dev: Pointer to acceleration device. + * @idle_delay: QAT HW idle time before power gating is initiated. + * 000 - 64us + * 001 - 128us + * 010 - 256us + * 011 - 512us + * 100 - 1ms + * 101 - 2ms + * 110 - 4ms + * 111 - 8ms + * + * Function sends to the FW the admin init message for the PM state + * configuration. + * + * Return: 0 on success, error code otherwise. + */ +int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct icp_qat_fw_init_admin_resp resp = {0}; + struct icp_qat_fw_init_admin_req req = {0}; + u32 ae_mask = hw_data->admin_ae_mask; + + if (!accel_dev->admin) { + dev_err(&GET_DEV(accel_dev), "adf_admin is not available\n"); + return -EFAULT; + } + + req.cmd_id = ICP_QAT_FW_PM_STATE_CONFIG; + req.idle_filter = idle_delay; + + return adf_send_admin(accel_dev, &req, &resp, ae_mask); +} +EXPORT_SYMBOL_GPL(adf_init_admin_pm); + int adf_init_admin_comms(struct adf_accel_dev *accel_dev) { struct adf_admin_comms *admin; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 0775491768797..e8c9b77c0d66b 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -102,6 +102,7 @@ void adf_exit_aer(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); +int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay); int adf_init_arb(struct adf_accel_dev *accel_dev); void adf_exit_arb(struct adf_accel_dev *accel_dev); void adf_update_ring_arb(struct adf_etr_ring_data *ring); diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.c b/drivers/crypto/qat/qat_common/adf_gen4_pm.c new file mode 100644 index 0000000000000..7037c0892a8a2 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2022 Intel Corporation */ +#include +#include +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_gen4_pm.h" +#include "adf_cfg_strings.h" +#include "icp_qat_fw_init_admin.h" +#include "adf_gen4_hw_data.h" +#include "adf_cfg.h" + +enum qat_pm_host_msg { + PM_NO_CHANGE = 0, + PM_SET_MIN, +}; + +struct adf_gen4_pm_data { + struct work_struct pm_irq_work; + struct adf_accel_dev *accel_dev; + u32 pm_int_sts; +}; + +static int send_host_msg(struct adf_accel_dev *accel_dev) +{ + void __iomem *pmisc = adf_get_pmisc_base(accel_dev); + u32 msg; + + msg = ADF_CSR_RD(pmisc, ADF_GEN4_PM_HOST_MSG); + if (msg & ADF_GEN4_PM_MSG_PENDING) + return -EBUSY; + + /* Send HOST_MSG */ + msg = FIELD_PREP(ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK, PM_SET_MIN); + msg |= ADF_GEN4_PM_MSG_PENDING; + ADF_CSR_WR(pmisc, ADF_GEN4_PM_HOST_MSG, msg); + + /* Poll status register to make sure the HOST_MSG has been processed */ + return read_poll_timeout(ADF_CSR_RD, msg, + !(msg & ADF_GEN4_PM_MSG_PENDING), + ADF_GEN4_PM_MSG_POLL_DELAY_US, + ADF_GEN4_PM_POLL_TIMEOUT_US, true, pmisc, + ADF_GEN4_PM_HOST_MSG); +} + +static void pm_bh_handler(struct work_struct *work) +{ + struct adf_gen4_pm_data *pm_data = + container_of(work, struct adf_gen4_pm_data, pm_irq_work); + struct adf_accel_dev *accel_dev = pm_data->accel_dev; + void __iomem *pmisc = adf_get_pmisc_base(accel_dev); + u32 pm_int_sts = pm_data->pm_int_sts; + u32 val; + + /* PM Idle interrupt */ + if (pm_int_sts & ADF_GEN4_PM_IDLE_STS) { + /* Issue host message to FW */ + if (send_host_msg(accel_dev)) + dev_warn_ratelimited(&GET_DEV(accel_dev), + "Failed to send host msg to FW\n"); + } + + /* Clear interrupt status */ + ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, pm_int_sts); + + /* Reenable PM interrupt */ + val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2); + val &= ~ADF_GEN4_PM_SOU; + ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val); + + kfree(pm_data); +} + +bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev) +{ + void __iomem *pmisc = adf_get_pmisc_base(accel_dev); + struct adf_gen4_pm_data *pm_data = NULL; + u32 errsou2; + u32 errmsk2; + u32 val; + + /* Only handle the interrupt triggered by PM */ + errmsk2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2); + if (errmsk2 & ADF_GEN4_PM_SOU) + return false; + + errsou2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRSOU2); + if (!(errsou2 & ADF_GEN4_PM_SOU)) + return false; + + /* Disable interrupt */ + val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2); + val |= ADF_GEN4_PM_SOU; + ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val); + + val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT); + + pm_data = kzalloc(sizeof(*pm_data), GFP_ATOMIC); + if (!pm_data) + return false; + + pm_data->pm_int_sts = val; + pm_data->accel_dev = accel_dev; + + INIT_WORK(&pm_data->pm_irq_work, pm_bh_handler); + adf_misc_wq_queue_work(&pm_data->pm_irq_work); + + return true; +} +EXPORT_SYMBOL_GPL(adf_gen4_handle_pm_interrupt); + +int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev) +{ + void __iomem *pmisc = adf_get_pmisc_base(accel_dev); + int ret; + u32 val; + + ret = adf_init_admin_pm(accel_dev, ADF_GEN4_PM_DEFAULT_IDLE_FILTER); + if (ret) + return ret; + + /* Enable default PM interrupts: IDLE, THROTTLE */ + val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT); + val |= ADF_GEN4_PM_INT_EN_DEFAULT; + + /* Clear interrupt status */ + val |= ADF_GEN4_PM_INT_STS_MASK; + ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, val); + + /* Unmask PM Interrupt */ + val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2); + val &= ~ADF_GEN4_PM_SOU; + ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val); + + return 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_enable_pm); diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.h b/drivers/crypto/qat/qat_common/adf_gen4_pm.h new file mode 100644 index 0000000000000..f8f8a9ee29e5b --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN4_PM_H +#define ADF_GEN4_PM_H + +#include "adf_accel_devices.h" + +/* Power management registers */ +#define ADF_GEN4_PM_HOST_MSG (0x50A01C) + +/* Power management */ +#define ADF_GEN4_PM_POLL_DELAY_US 20 +#define ADF_GEN4_PM_POLL_TIMEOUT_US USEC_PER_SEC +#define ADF_GEN4_PM_MSG_POLL_DELAY_US (10 * USEC_PER_MSEC) +#define ADF_GEN4_PM_STATUS (0x50A00C) +#define ADF_GEN4_PM_INTERRUPT (0x50A028) + +/* Power management source in ERRSOU2 and ERRMSK2 */ +#define ADF_GEN4_PM_SOU BIT(18) + +#define ADF_GEN4_PM_IDLE_INT_EN BIT(18) +#define ADF_GEN4_PM_THROTTLE_INT_EN BIT(19) +#define ADF_GEN4_PM_DRV_ACTIVE BIT(20) +#define ADF_GEN4_PM_INIT_STATE BIT(21) +#define ADF_GEN4_PM_INT_EN_DEFAULT (ADF_GEN4_PM_IDLE_INT_EN | \ + ADF_GEN4_PM_THROTTLE_INT_EN) + +#define ADF_GEN4_PM_THR_STS BIT(0) +#define ADF_GEN4_PM_IDLE_STS BIT(1) +#define ADF_GEN4_PM_FW_INT_STS BIT(2) +#define ADF_GEN4_PM_INT_STS_MASK (ADF_GEN4_PM_THR_STS | \ + ADF_GEN4_PM_IDLE_STS | \ + ADF_GEN4_PM_FW_INT_STS) + +#define ADF_GEN4_PM_MSG_PENDING BIT(0) +#define ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK GENMASK(28, 1) + +#define ADF_GEN4_PM_DEFAULT_IDLE_FILTER (0x0) +#define ADF_GEN4_PM_MAX_IDLE_FILTER (0x7) + +int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev); +bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev); + +#endif diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 2edc63c6b6caa..c2c718f1b4895 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -181,6 +181,12 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) if (hw_data->set_ssm_wdtimer) hw_data->set_ssm_wdtimer(accel_dev); + /* Enable Power Management */ + if (hw_data->enable_pm && hw_data->enable_pm(accel_dev)) { + dev_err(&GET_DEV(accel_dev), "Failed to configure Power Management\n"); + return -EFAULT; + } + list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); if (service->event_hld(accel_dev, ADF_EVENT_START)) { diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index 803b89ba9670c..a35149f8bf1ee 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -124,6 +124,17 @@ static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev) } #endif /* CONFIG_PCI_IOV */ +static bool adf_handle_pm_int(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + + if (hw_data->handle_pm_interrupt && + hw_data->handle_pm_interrupt(accel_dev)) + return true; + + return false; +} + static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) { struct adf_accel_dev *accel_dev = dev_ptr; @@ -134,6 +145,9 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) return IRQ_HANDLED; #endif /* CONFIG_PCI_IOV */ + if (adf_handle_pm_int(accel_dev)) + return IRQ_HANDLED; + dev_dbg(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n", accel_dev->accel_id); diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h index afe59a7684ac5..56cb827f93ea3 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h @@ -16,6 +16,7 @@ enum icp_qat_fw_init_admin_cmd_id { ICP_QAT_FW_HEARTBEAT_SYNC = 7, ICP_QAT_FW_HEARTBEAT_GET = 8, ICP_QAT_FW_COMP_CAPABILITY_GET = 9, + ICP_QAT_FW_PM_STATE_CONFIG = 128, }; enum icp_qat_fw_init_admin_resp_status { -- GitLab From 882f6c602b65cd384bec2cea4fbfc091a7bbfc50 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Thu, 10 Feb 2022 21:28:03 +0100 Subject: [PATCH 0495/1586] crypto: omap-aes - Constify static attribute_group The only usage of omap_aes_attr_group is to pass its address to sysfs_{create,remove}_group(), which takes pointers to const struct attribute_group. Make it const to allow the compiler to put it in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index a196bb8b17010..581211a926283 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1093,7 +1093,7 @@ static struct attribute *omap_aes_attrs[] = { NULL, }; -static struct attribute_group omap_aes_attr_group = { +static const struct attribute_group omap_aes_attr_group = { .attrs = omap_aes_attrs, }; -- GitLab From 83b5a23b6604028ef635a72465b23a85a425b695 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Thu, 10 Feb 2022 21:28:04 +0100 Subject: [PATCH 0496/1586] crypto: omap-sham - Constify static attribute_group The only usage of omap_sham_attr_group is to pass its address to sysfs_{create,remove}_group(), which takes pointers to const struct attribute_group. Make it const to allow the compiler to put it in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index f6bf53c00b614..4b37dc69a50ce 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2045,7 +2045,7 @@ static struct attribute *omap_sham_attrs[] = { NULL, }; -static struct attribute_group omap_sham_attr_group = { +static const struct attribute_group omap_sham_attr_group = { .attrs = omap_sham_attrs, }; -- GitLab From bd75b4ef4977f567c7a567cf8f48dc122a097aa9 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Thu, 10 Feb 2022 21:28:05 +0100 Subject: [PATCH 0497/1586] crypto: nx - Constify static attribute_group structs The only usage of these is to pass their address to sysfs_{create,remove}_group(), which takes pointers to const struct attribute_group. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Reviewed-by: Daniel Axtens Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-common-pseries.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index 4e304f6081e47..7584a34ba88c2 100644 --- a/drivers/crypto/nx/nx-common-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -962,7 +962,7 @@ static struct attribute *nx842_sysfs_entries[] = { NULL, }; -static struct attribute_group nx842_attribute_group = { +static const struct attribute_group nx842_attribute_group = { .name = NULL, /* put in device directory */ .attrs = nx842_sysfs_entries, }; @@ -992,7 +992,7 @@ static struct attribute *nxcop_caps_sysfs_entries[] = { NULL, }; -static struct attribute_group nxcop_caps_attr_group = { +static const struct attribute_group nxcop_caps_attr_group = { .name = "nx_gzip_caps", .attrs = nxcop_caps_sysfs_entries, }; -- GitLab From 142be74078a2cf24f9694093e21216b2d8740d17 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 10 Feb 2022 21:42:18 +0100 Subject: [PATCH 0498/1586] crypto: ux500 - use GFP_KERNEL Platform_driver probe functions aren't called with locks held and thus don't need GFP_ATOMIC. Use GFP_KERNEL instead. Problem found with Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Herbert Xu --- drivers/crypto/ux500/cryp/cryp_core.c | 2 +- drivers/crypto/ux500/hash/hash_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 97277b7150cb4..5a57c9afd8c88 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1264,7 +1264,7 @@ static int ux500_cryp_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; dev_dbg(dev, "[%s]", __func__); - device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC); + device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL); if (!device_data) { ret = -ENOMEM; goto out; diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 51a6e1a424349..5157c118d642c 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -1658,7 +1658,7 @@ static int ux500_hash_probe(struct platform_device *pdev) struct hash_device_data *device_data; struct device *dev = &pdev->dev; - device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC); + device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL); if (!device_data) { ret = -ENOMEM; goto out; -- GitLab From aec01cc8d119b453b26da9ba45ec60ac2b395e18 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Feb 2022 17:07:58 +0800 Subject: [PATCH 0499/1586] crypto: hisilicon/sec - add the register configuration for HW V3 Added the register configuration of the SVA mode for HW V3. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 51 +++++++++++++++++++----- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 26d3ab1d308ba..45d2b27da9ad3 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -90,6 +90,10 @@ SEC_USER1_WB_DATA_SSV) #define SEC_USER1_SMMU_SVA (SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET) #define SEC_USER1_SMMU_MASK (~SEC_USER1_SVA_SET) +#define SEC_INTERFACE_USER_CTRL0_REG_V3 0x302220 +#define SEC_INTERFACE_USER_CTRL1_REG_V3 0x302224 +#define SEC_USER1_SMMU_NORMAL_V3 (BIT(23) | BIT(17) | BIT(11) | BIT(5)) +#define SEC_USER1_SMMU_MASK_V3 0xFF79E79E #define SEC_CORE_INT_STATUS_M_ECC BIT(2) #define SEC_PREFETCH_CFG 0x301130 @@ -335,6 +339,41 @@ static void sec_set_endian(struct hisi_qm *qm) writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); } +static void sec_engine_sva_config(struct hisi_qm *qm) +{ + u32 reg; + + if (qm->ver > QM_HW_V2) { + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG_V3); + reg |= SEC_USER0_SMMU_NORMAL; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG_V3); + + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG_V3); + reg &= SEC_USER1_SMMU_MASK_V3; + reg |= SEC_USER1_SMMU_NORMAL_V3; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG_V3); + } else { + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG); + reg |= SEC_USER0_SMMU_NORMAL; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG); + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG); + reg &= SEC_USER1_SMMU_MASK; + if (qm->use_sva) + reg |= SEC_USER1_SMMU_SVA; + else + reg |= SEC_USER1_SMMU_NORMAL; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG); + } +} + static void sec_open_sva_prefetch(struct hisi_qm *qm) { u32 val; @@ -426,17 +465,7 @@ static int sec_engine_init(struct hisi_qm *qm) reg |= (0x1 << SEC_TRNG_EN_SHIFT); writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); - reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL0_REG); - reg |= SEC_USER0_SMMU_NORMAL; - writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL0_REG); - - reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL1_REG); - reg &= SEC_USER1_SMMU_MASK; - if (qm->use_sva && qm->ver == QM_HW_V2) - reg |= SEC_USER1_SMMU_SVA; - else - reg |= SEC_USER1_SMMU_NORMAL; - writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL1_REG); + sec_engine_sva_config(qm); writel(SEC_SINGLE_PORT_MAX_TRANS, qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS); -- GitLab From f8a2652826444d13181061840b96a5d975d5b6c6 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Feb 2022 17:08:18 +0800 Subject: [PATCH 0500/1586] crypto: hisilicon/sec - not need to enable sm4 extra mode at HW V3 It is not need to enable sm4 extra mode in at HW V3. Here is fix it. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 45d2b27da9ad3..0b9906ff69e3c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -472,9 +472,11 @@ static int sec_engine_init(struct hisi_qm *qm) writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG); - /* Enable sm4 extra mode, as ctr/ecb */ - writel_relaxed(SEC_BD_ERR_CHK_EN0, - qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* HW V2 enable sm4 extra mode, as ctr/ecb */ + if (qm->ver < QM_HW_V3) + writel_relaxed(SEC_BD_ERR_CHK_EN0, + qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* Enable sm4 xts mode multiple iv */ writel_relaxed(SEC_BD_ERR_CHK_EN1, qm->io_base + SEC_BD_ERR_CHK_EN_REG1); -- GitLab From 973d74e93820d99d8ea203882631c76edab699c9 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 11 Feb 2022 12:16:17 +0000 Subject: [PATCH 0501/1586] crypto: rockchip - ECB does not need IV When loading rockchip crypto module, testmgr complains that ivsize of ecb-des3-ede-rk is not the same than generic implementation. In fact ECB does not use an IV. Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 1cece1a7d3f00..5bbf0d2722e11 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { .exit = rk_ablk_exit_tfm, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, .setkey = rk_tdes_setkey, .encrypt = rk_des3_ede_ecb_encrypt, .decrypt = rk_des3_ede_ecb_decrypt, -- GitLab From c90e453916bd4d74297303d000f011cdb47a7d94 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 13 Feb 2022 21:46:28 +0100 Subject: [PATCH 0502/1586] hwrng: core - do not bother to order list of devices by quality There is no real reason why this list needs to be kept ordered by the driver-provided quality value -- a value which is set only by a handful of hw_random devices anyway. Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 36 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index f327f7493585e..6f09f4e5af20f 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -31,7 +31,7 @@ static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; static struct task_struct *hwrng_fill; -/* list of registered rngs, sorted decending by quality */ +/* list of registered rngs */ static LIST_HEAD(rng_list); /* Protects rng_list and current_rng */ static DEFINE_MUTEX(rng_mutex); @@ -297,24 +297,28 @@ static struct miscdevice rng_miscdev = { static int enable_best_rng(void) { + struct hwrng *rng, *new_rng = NULL; int ret = -ENODEV; BUG_ON(!mutex_is_locked(&rng_mutex)); - /* rng_list is sorted by quality, use the best (=first) one */ - if (!list_empty(&rng_list)) { - struct hwrng *new_rng; - - new_rng = list_entry(rng_list.next, struct hwrng, list); - ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng)); - if (!ret) - cur_rng_set_by_user = 0; - } else { + /* no rng to use? */ + if (list_empty(&rng_list)) { drop_current_rng(); cur_rng_set_by_user = 0; - ret = 0; + return 0; } + /* use the rng which offers the best quality */ + list_for_each_entry(rng, &rng_list, list) { + if (!new_rng || rng->quality > new_rng->quality) + new_rng = rng; + } + + ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng)); + if (!ret) + cur_rng_set_by_user = 0; + return ret; } @@ -475,7 +479,6 @@ int hwrng_register(struct hwrng *rng) { int err = -EINVAL; struct hwrng *tmp; - struct list_head *rng_list_ptr; bool is_new_current = false; if (!rng->name || (!rng->data_read && !rng->read)) @@ -489,18 +492,11 @@ int hwrng_register(struct hwrng *rng) if (strcmp(tmp->name, rng->name) == 0) goto out_unlock; } + list_add_tail(&rng->list, &rng_list); init_completion(&rng->cleanup_done); complete(&rng->cleanup_done); - /* rng_list is sorted by decreasing quality */ - list_for_each(rng_list_ptr, &rng_list) { - tmp = list_entry(rng_list_ptr, struct hwrng, list); - if (tmp->quality < rng->quality) - break; - } - list_add_tail(&rng->list, rng_list_ptr); - if (!current_rng || (!cur_rng_set_by_user && rng->quality > current_rng->quality)) { /* -- GitLab From 077bb7a1baec75bd2d7d08e2cbdeb5b72344a4ad Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 13 Feb 2022 21:46:29 +0100 Subject: [PATCH 0503/1586] hwrng: core - start and stop in-kernel rngd in separate function Extract the start/stop logic for the in-kernel rngd thread to a separate function. Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 6f09f4e5af20f..29febf55b0d46 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -51,7 +51,7 @@ MODULE_PARM_DESC(default_quality, static void drop_current_rng(void); static int hwrng_init(struct hwrng *rng); -static void start_khwrngd(void); +static void hwrng_manage_rngd(void); static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, int wait); @@ -164,10 +164,7 @@ skip_init: if (current_quality > 1024) current_quality = 1024; - if (current_quality == 0 && hwrng_fill) - kthread_stop(hwrng_fill); - if (current_quality > 0 && !hwrng_fill) - start_khwrngd(); + hwrng_manage_rngd(); return 0; } @@ -466,12 +463,19 @@ static int hwrng_fillfn(void *unused) return 0; } -static void start_khwrngd(void) +static void hwrng_manage_rngd(void) { - hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); - if (IS_ERR(hwrng_fill)) { - pr_err("hwrng_fill thread creation failed\n"); - hwrng_fill = NULL; + if (WARN_ON(!mutex_is_locked(&rng_mutex))) + return; + + if (current_quality == 0 && hwrng_fill) + kthread_stop(hwrng_fill); + if (current_quality > 0 && !hwrng_fill) { + hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); + if (IS_ERR(hwrng_fill)) { + pr_err("hwrng_fill thread creation failed\n"); + hwrng_fill = NULL; + } } } -- GitLab From f0fb6953b39e015acfecb8b5158642614e6ce364 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 13 Feb 2022 21:46:30 +0100 Subject: [PATCH 0504/1586] hwrng: core - use per-rng quality value instead of global setting The current_quality variable exposed as a module parameter is fundamentally broken: If it is set at boot time, it is overwritten once the first hw rng device is loaded; if it is set at runtime, it is without effect if the hw rng device had its quality value set to 0 (and no default_quality was set); and if a new rng is selected, it gets overwritten. Therefore, mark it as obsolete, and replace it by the per-rng quality setting. Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 29febf55b0d46..8df102b39b35d 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -44,14 +44,14 @@ static unsigned short default_quality; /* = 0; default to "off" */ module_param(current_quality, ushort, 0644); MODULE_PARM_DESC(current_quality, - "current hwrng entropy estimation per 1024 bits of input"); + "current hwrng entropy estimation per 1024 bits of input -- obsolete"); module_param(default_quality, ushort, 0644); MODULE_PARM_DESC(default_quality, "default entropy content of hwrng per 1024 bits of input"); static void drop_current_rng(void); static int hwrng_init(struct hwrng *rng); -static void hwrng_manage_rngd(void); +static void hwrng_manage_rngd(struct hwrng *rng); static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, int wait); @@ -160,11 +160,13 @@ static int hwrng_init(struct hwrng *rng) reinit_completion(&rng->cleanup_done); skip_init: - current_quality = rng->quality ? : default_quality; - if (current_quality > 1024) - current_quality = 1024; + if (!rng->quality) + rng->quality = default_quality; + if (rng->quality > 1024) + rng->quality = 1024; + current_quality = rng->quality; /* obsolete */ - hwrng_manage_rngd(); + hwrng_manage_rngd(rng); return 0; } @@ -429,19 +431,24 @@ static int hwrng_fillfn(void *unused) long rc; while (!kthread_should_stop()) { + unsigned short quality; struct hwrng *rng; - if (!current_quality) - break; - rng = get_current_rng(); if (IS_ERR(rng) || !rng) break; mutex_lock(&reading_mutex); rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); + if (current_quality != rng->quality) + rng->quality = current_quality; /* obsolete */ + quality = rng->quality; mutex_unlock(&reading_mutex); put_rng(rng); + + if (!quality) + break; + if (rc <= 0) { pr_warn("hwrng: no data available\n"); msleep_interruptible(10000); @@ -451,7 +458,7 @@ static int hwrng_fillfn(void *unused) /* If we cannot credit at least one bit of entropy, * keep track of the remainder for the next iteration */ - entropy = rc * current_quality * 8 + entropy_credit; + entropy = rc * quality * 8 + entropy_credit; if ((entropy >> 10) == 0) entropy_credit = entropy; @@ -463,14 +470,14 @@ static int hwrng_fillfn(void *unused) return 0; } -static void hwrng_manage_rngd(void) +static void hwrng_manage_rngd(struct hwrng *rng) { if (WARN_ON(!mutex_is_locked(&rng_mutex))) return; - if (current_quality == 0 && hwrng_fill) + if (rng->quality == 0 && hwrng_fill) kthread_stop(hwrng_fill); - if (current_quality > 0 && !hwrng_fill) { + if (rng->quality > 0 && !hwrng_fill) { hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); if (IS_ERR(hwrng_fill)) { pr_err("hwrng_fill thread creation failed\n"); -- GitLab From 8208285632f950d2bfd489b10148e05134b7119e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 13 Feb 2022 21:46:31 +0100 Subject: [PATCH 0505/1586] hwrng: core - introduce rng_quality sysfs attribute The rng_quality sysfs attribute returns the quality setting for the currently active hw_random device, in entropy bits per 1024 bits of input. Storing a value between 0 and 1024 to this file updates this estimate accordingly. Based on the updates to the quality setting, the rngd kernel thread may be stopped (if no hw_random device is trusted to return entropy), may be started (if the quality setting is increased from zero), or may use a different hw_random source (if that has higher quality output). Cc: Herbert Xu Cc: Jason A. Donenfeld Signed-off-by: Dominik Brodowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 64 ++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 8df102b39b35d..71eafcc451e12 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -44,7 +44,7 @@ static unsigned short default_quality; /* = 0; default to "off" */ module_param(current_quality, ushort, 0644); MODULE_PARM_DESC(current_quality, - "current hwrng entropy estimation per 1024 bits of input -- obsolete"); + "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead"); module_param(default_quality, ushort, 0644); MODULE_PARM_DESC(default_quality, "default entropy content of hwrng per 1024 bits of input"); @@ -402,14 +402,76 @@ static ssize_t rng_selected_show(struct device *dev, return sysfs_emit(buf, "%d\n", cur_rng_set_by_user); } +static ssize_t rng_quality_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t ret; + struct hwrng *rng; + + rng = get_current_rng(); + if (IS_ERR(rng)) + return PTR_ERR(rng); + + if (!rng) /* no need to put_rng */ + return -ENODEV; + + ret = sysfs_emit(buf, "%hu\n", rng->quality); + put_rng(rng); + + return ret; +} + +static ssize_t rng_quality_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + u16 quality; + int ret = -EINVAL; + + if (len < 2) + return -EINVAL; + + ret = mutex_lock_interruptible(&rng_mutex); + if (ret) + return -ERESTARTSYS; + + ret = kstrtou16(buf, 0, &quality); + if (ret || quality > 1024) { + ret = -EINVAL; + goto out; + } + + if (!current_rng) { + ret = -ENODEV; + goto out; + } + + current_rng->quality = quality; + current_quality = quality; /* obsolete */ + + /* the best available RNG may have changed */ + ret = enable_best_rng(); + + /* start/stop rngd if necessary */ + if (current_rng) + hwrng_manage_rngd(current_rng); + +out: + mutex_unlock(&rng_mutex); + return ret ? ret : len; +} + static DEVICE_ATTR_RW(rng_current); static DEVICE_ATTR_RO(rng_available); static DEVICE_ATTR_RO(rng_selected); +static DEVICE_ATTR_RW(rng_quality); static struct attribute *rng_dev_attrs[] = { &dev_attr_rng_current.attr, &dev_attr_rng_available.attr, &dev_attr_rng_selected.attr, + &dev_attr_rng_quality.attr, NULL }; -- GitLab From 43a4b1fee098bd38eed9c334d0e0df221ecdf719 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 29 Jan 2022 09:59:22 +0800 Subject: [PATCH 0506/1586] block, bfq: cleanup bfq_bfqq_to_bfqg() Use bfq_group() instead, which do the same thing. Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Acked-by: Paolo Valente Link: https://lore.kernel.org/r/20220129015924.3958918-2-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 4 ++-- block/bfq-iosched.h | 1 - block/bfq-wf2q.c | 15 --------------- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0c612a9116967..2f2b97cad9802 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -774,7 +774,7 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) if (!bfqq->next_rq) return; - bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree; + bfqq->pos_root = &bfqq_group(bfqq)->rq_pos_tree; __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, blk_rq_pos(bfqq->next_rq), &parent, &p); if (!__bfqq) { @@ -2669,7 +2669,7 @@ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, struct bfq_queue *bfqq, sector_t sector) { - struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree; + struct rb_root *root = &bfqq_group(bfqq)->rq_pos_tree; struct rb_node *parent, *node; struct bfq_queue *__bfqq; diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 72255ec44f8f8..3b83e3d1c2e58 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -1050,7 +1050,6 @@ extern struct blkcg_policy blkcg_policy_bfq; for (parent = NULL; entity ; entity = parent) #endif /* CONFIG_BFQ_GROUP_IOSCHED */ -struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq); struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity); unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd); struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 709b901de3ca9..f8eb340381cf1 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -142,16 +142,6 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, #ifdef CONFIG_BFQ_GROUP_IOSCHED -struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) -{ - struct bfq_entity *group_entity = bfqq->entity.parent; - - if (!group_entity) - group_entity = &bfqq->bfqd->root_group->entity; - - return container_of(group_entity, struct bfq_group, entity); -} - /* * Returns true if this budget changes may let next_in_service->parent * become the next_in_service entity for its parent entity. @@ -230,11 +220,6 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) #else /* CONFIG_BFQ_GROUP_IOSCHED */ -struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) -{ - return bfqq->bfqd->root_group; -} - static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) { return false; -- GitLab From c5e4cb0fcbbaa5ad853818c4a2383e9bd147fad6 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 29 Jan 2022 09:59:23 +0800 Subject: [PATCH 0507/1586] block, bfq: avoid moving bfqq to it's parent bfqg Moving bfqq to it's parent bfqg is pointless. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20220129015924.3958918-3-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 24a5c5329bcd0..9783c11561591 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -645,6 +645,14 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) { struct bfq_entity *entity = &bfqq->entity; + struct bfq_group *old_parent = bfqq_group(bfqq); + + /* + * No point to move bfqq to the same group, which can happen when + * root group is offlined + */ + if (old_parent == bfqg) + return; /* * Get extra reference to prevent bfqq from being freed in @@ -666,7 +674,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); - bfqg_and_blkg_put(bfqq_group(bfqq)); + bfqg_and_blkg_put(old_parent); if (entity->parent && entity->parent->last_bfqq_created == bfqq) -- GitLab From 8410f70977734f21b8ed45c37e925d311dfda2e7 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 29 Jan 2022 09:59:24 +0800 Subject: [PATCH 0508/1586] block, bfq: don't move oom_bfqq Our test report a UAF: [ 2073.019181] ================================================================== [ 2073.019188] BUG: KASAN: use-after-free in __bfq_put_async_bfqq+0xa0/0x168 [ 2073.019191] Write of size 8 at addr ffff8000ccf64128 by task rmmod/72584 [ 2073.019192] [ 2073.019196] CPU: 0 PID: 72584 Comm: rmmod Kdump: loaded Not tainted 4.19.90-yk #5 [ 2073.019198] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2073.019200] Call trace: [ 2073.019203] dump_backtrace+0x0/0x310 [ 2073.019206] show_stack+0x28/0x38 [ 2073.019210] dump_stack+0xec/0x15c [ 2073.019216] print_address_description+0x68/0x2d0 [ 2073.019220] kasan_report+0x238/0x2f0 [ 2073.019224] __asan_store8+0x88/0xb0 [ 2073.019229] __bfq_put_async_bfqq+0xa0/0x168 [ 2073.019233] bfq_put_async_queues+0xbc/0x208 [ 2073.019236] bfq_pd_offline+0x178/0x238 [ 2073.019240] blkcg_deactivate_policy+0x1f0/0x420 [ 2073.019244] bfq_exit_queue+0x128/0x178 [ 2073.019249] blk_mq_exit_sched+0x12c/0x160 [ 2073.019252] elevator_exit+0xc8/0xd0 [ 2073.019256] blk_exit_queue+0x50/0x88 [ 2073.019259] blk_cleanup_queue+0x228/0x3d8 [ 2073.019267] null_del_dev+0xfc/0x1e0 [null_blk] [ 2073.019274] null_exit+0x90/0x114 [null_blk] [ 2073.019278] __arm64_sys_delete_module+0x358/0x5a0 [ 2073.019282] el0_svc_common+0xc8/0x320 [ 2073.019287] el0_svc_handler+0xf8/0x160 [ 2073.019290] el0_svc+0x10/0x218 [ 2073.019291] [ 2073.019294] Allocated by task 14163: [ 2073.019301] kasan_kmalloc+0xe0/0x190 [ 2073.019305] kmem_cache_alloc_node_trace+0x1cc/0x418 [ 2073.019308] bfq_pd_alloc+0x54/0x118 [ 2073.019313] blkcg_activate_policy+0x250/0x460 [ 2073.019317] bfq_create_group_hierarchy+0x38/0x110 [ 2073.019321] bfq_init_queue+0x6d0/0x948 [ 2073.019325] blk_mq_init_sched+0x1d8/0x390 [ 2073.019330] elevator_switch_mq+0x88/0x170 [ 2073.019334] elevator_switch+0x140/0x270 [ 2073.019338] elv_iosched_store+0x1a4/0x2a0 [ 2073.019342] queue_attr_store+0x90/0xe0 [ 2073.019348] sysfs_kf_write+0xa8/0xe8 [ 2073.019351] kernfs_fop_write+0x1f8/0x378 [ 2073.019359] __vfs_write+0xe0/0x360 [ 2073.019363] vfs_write+0xf0/0x270 [ 2073.019367] ksys_write+0xdc/0x1b8 [ 2073.019371] __arm64_sys_write+0x50/0x60 [ 2073.019375] el0_svc_common+0xc8/0x320 [ 2073.019380] el0_svc_handler+0xf8/0x160 [ 2073.019383] el0_svc+0x10/0x218 [ 2073.019385] [ 2073.019387] Freed by task 72584: [ 2073.019391] __kasan_slab_free+0x120/0x228 [ 2073.019394] kasan_slab_free+0x10/0x18 [ 2073.019397] kfree+0x94/0x368 [ 2073.019400] bfqg_put+0x64/0xb0 [ 2073.019404] bfqg_and_blkg_put+0x90/0xb0 [ 2073.019408] bfq_put_queue+0x220/0x228 [ 2073.019413] __bfq_put_async_bfqq+0x98/0x168 [ 2073.019416] bfq_put_async_queues+0xbc/0x208 [ 2073.019420] bfq_pd_offline+0x178/0x238 [ 2073.019424] blkcg_deactivate_policy+0x1f0/0x420 [ 2073.019429] bfq_exit_queue+0x128/0x178 [ 2073.019433] blk_mq_exit_sched+0x12c/0x160 [ 2073.019437] elevator_exit+0xc8/0xd0 [ 2073.019440] blk_exit_queue+0x50/0x88 [ 2073.019443] blk_cleanup_queue+0x228/0x3d8 [ 2073.019451] null_del_dev+0xfc/0x1e0 [null_blk] [ 2073.019459] null_exit+0x90/0x114 [null_blk] [ 2073.019462] __arm64_sys_delete_module+0x358/0x5a0 [ 2073.019467] el0_svc_common+0xc8/0x320 [ 2073.019471] el0_svc_handler+0xf8/0x160 [ 2073.019474] el0_svc+0x10/0x218 [ 2073.019475] [ 2073.019479] The buggy address belongs to the object at ffff8000ccf63f00 which belongs to the cache kmalloc-1024 of size 1024 [ 2073.019484] The buggy address is located 552 bytes inside of 1024-byte region [ffff8000ccf63f00, ffff8000ccf64300) [ 2073.019486] The buggy address belongs to the page: [ 2073.019492] page:ffff7e000333d800 count:1 mapcount:0 mapping:ffff8000c0003a00 index:0x0 compound_mapcount: 0 [ 2073.020123] flags: 0x7ffff0000008100(slab|head) [ 2073.020403] raw: 07ffff0000008100 ffff7e0003334c08 ffff7e00001f5a08 ffff8000c0003a00 [ 2073.020409] raw: 0000000000000000 00000000001c001c 00000001ffffffff 0000000000000000 [ 2073.020411] page dumped because: kasan: bad access detected [ 2073.020412] [ 2073.020414] Memory state around the buggy address: [ 2073.020420] ffff8000ccf64000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020424] ffff8000ccf64080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020428] >ffff8000ccf64100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020430] ^ [ 2073.020434] ffff8000ccf64180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020438] ffff8000ccf64200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020439] ================================================================== The same problem exist in mainline as well. This is because oom_bfqq is moved to a non-root group, thus root_group is freed earlier. Thus fix the problem by don't move oom_bfqq. Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Acked-by: Paolo Valente Link: https://lore.kernel.org/r/20220129015924.3958918-4-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 9783c11561591..420eda2589c0e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -654,6 +654,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (old_parent == bfqg) return; + /* + * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group + * until elevator exit. + */ + if (bfqq == &bfqd->oom_bfqq) + return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. -- GitLab From 07025ceaac9f4f7a9e1a3285c3216469bf066320 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 17 Feb 2022 09:00:24 +0800 Subject: [PATCH 0509/1586] spi: clean up some inconsistent indenting Eliminate the follow smatch warning: drivers/spi/spi-sunplus-sp7021.c:379 sp7021_spi_slave_transfer_one() warn: inconsistent indenting Reported-by: Abaci Robot Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20220217010024.111904-1-yang.lee@linux.alibaba.com Signed-off-by: Mark Brown --- drivers/spi/spi-sunplus-sp7021.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c index ade7a0fca8cb3..f989f7b992961 100644 --- a/drivers/spi/spi-sunplus-sp7021.c +++ b/drivers/spi/spi-sunplus-sp7021.c @@ -376,8 +376,8 @@ static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi xfer->len, DMA_TO_DEVICE); if (dma_mapping_error(dev, xfer->tx_dma)) return -ENOMEM; - ret = sp7021_spi_slave_tx(spi, xfer); - dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE); + ret = sp7021_spi_slave_tx(spi, xfer); + dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE); } else if (xfer->rx_buf && !xfer->tx_buf) { xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len, DMA_FROM_DEVICE); -- GitLab From 043786303b175977e515d4e99cf6b5f886b136dc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 18 Feb 2022 14:58:35 +0100 Subject: [PATCH 0510/1586] spi: use sysfs_emit() for printing statistics and add trailing newline Use dedicated function sysfs_emit() that does some extra checking, e.g. to ensure that no more than PAGESIZE bytes are written. In addition add a trailing newline to the output, that makes it better readable from the console. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/56e1588d-d53b-73e9-fdc8-7fe30bf91f11@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 85f8ae4cc0c0d..cd4dc3131e17f 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -143,7 +143,7 @@ static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \ unsigned long flags; \ ssize_t len; \ spin_lock_irqsave(&stat->lock, flags); \ - len = sprintf(buf, format_string, stat->field); \ + len = sysfs_emit(buf, format_string "\n", stat->field); \ spin_unlock_irqrestore(&stat->lock, flags); \ return len; \ } \ -- GitLab From 5ea33af9d430cd1dbfada1b839e0d317ed77bfac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Thu, 17 Feb 2022 15:21:29 +0100 Subject: [PATCH 0511/1586] selinux: drop return statement at end of void functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those return statements at the end of a void function are redundant. Reported by clang-tidy [readability-redundant-control-flow] Signed-off-by: Christian Göttsche Signed-off-by: Paul Moore --- security/selinux/hooks.c | 2 -- security/selinux/ss/conditional.c | 2 -- security/selinux/ss/ebitmap.c | 1 - security/selinux/ss/mls.c | 1 - security/selinux/ss/services.c | 2 -- 5 files changed, 8 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index dafabb4dcc640..1e69f88eb326a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3284,8 +3284,6 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, isec->sid = newsid; isec->initialized = LABEL_INITIALIZED; spin_unlock(&isec->lock); - - return; } static int selinux_inode_getxattr(struct dentry *dentry, const char *name) diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 2ec6e5cd25d9b..c46c419af512e 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -566,8 +566,6 @@ void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key, if (node->key.specified & AVTAB_ENABLED) services_compute_xperms_decision(xpermd, node); } - return; - } /* Determine whether additional permissions are granted by the conditional * av table, and if so, add them to the result diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 61fcbb8d0f880..abde349c83217 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -359,7 +359,6 @@ void ebitmap_destroy(struct ebitmap *e) e->highbit = 0; e->node = NULL; - return; } int ebitmap_read(struct ebitmap *e, void *fp) diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 3f5fd124342c9..99571b19d4a9e 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -156,7 +156,6 @@ void mls_sid_to_context(struct policydb *p, } *scontext = scontextp; - return; } int mls_level_isvalid(struct policydb *p, struct mls_level *l) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 2f8db93e53b29..6901dc07680de 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -529,8 +529,6 @@ out: /* release scontext/tcontext */ kfree(tcontext_name); kfree(scontext_name); - - return; } /* -- GitLab From b97df7c098c531010e445da88d02b7bf7bf59ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Thu, 17 Feb 2022 15:21:25 +0100 Subject: [PATCH 0512/1586] selinux: use correct type for context length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit security_sid_to_context() expects a pointer to an u32 as the address where to store the length of the computed context. Reported by sparse: security/selinux/xfrm.c:359:39: warning: incorrect type in arg 4 (different signedness) security/selinux/xfrm.c:359:39: expected unsigned int [usertype] *scontext_len security/selinux/xfrm.c:359:39: got int * Signed-off-by: Christian Göttsche [PM: wrapped commit description] Signed-off-by: Paul Moore --- security/selinux/xfrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 90697317895fb..c576832febc67 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -347,7 +347,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, int rc; struct xfrm_sec_ctx *ctx; char *ctx_str = NULL; - int str_len; + u32 str_len; if (!polsec) return 0; -- GitLab From ad2f3b08d1ab17e481df753aae221f085fe1cc8a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Feb 2022 21:05:33 +0100 Subject: [PATCH 0513/1586] ACPI: scan: Use ida_alloc() instead of ida_simple_get() As recommended in include/linux/idr.h, use ida_alloc() instead of ida_simple_get() for creating unique device object names and for symmetry replace ida_simple_remove() with ida_free() (and fix up the related overly long code line while at it). Also drop the ACPI_MAX_DEVICE_INSTANCES limit that is not necessary any more and may not be sufficient for future platforms. Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko --- drivers/acpi/internal.h | 2 -- drivers/acpi/scan.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 457e11d851b8d..628bf8f181304 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -96,8 +96,6 @@ void acpi_scan_table_notify(void); extern struct list_head acpi_bus_id_list; -#define ACPI_MAX_DEVICE_INSTANCES 4096 - struct acpi_device_bus_id { const char *bus_id; struct ida instance_ida; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 1331756d4cfce..5fa34f5316f11 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -477,7 +477,8 @@ static void acpi_device_del(struct acpi_device *device) list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node) if (!strcmp(acpi_device_bus_id->bus_id, acpi_device_hid(device))) { - ida_simple_remove(&acpi_device_bus_id->instance_ida, device->pnp.instance_no); + ida_free(&acpi_device_bus_id->instance_ida, + device->pnp.instance_no); if (ida_is_empty(&acpi_device_bus_id->instance_ida)) { list_del(&acpi_device_bus_id->node); kfree_const(acpi_device_bus_id->bus_id); @@ -642,7 +643,7 @@ static int acpi_device_set_name(struct acpi_device *device, struct ida *instance_ida = &acpi_device_bus_id->instance_ida; int result; - result = ida_simple_get(instance_ida, 0, ACPI_MAX_DEVICE_INSTANCES, GFP_KERNEL); + result = ida_alloc(instance_ida, GFP_KERNEL); if (result < 0) return result; -- GitLab From 9978f446d406a7dae080ca4a682bac927c9b8773 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Feb 2022 16:53:18 +0200 Subject: [PATCH 0514/1586] ACPI: property: Get rid of redundant 'else' In the snippets like the following if (...) return / goto / break / continue ...; else ... the 'else' is redundant. Get rid of it. While at it, replace conditional '<= 0' for unsigned type by '== 0' in acpi_data_prop_read(); update comment in the __acpi_node_get_property_reference() on how we parse the reference. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 3fceb4681ec9f..12bbfe8336095 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -541,7 +541,8 @@ acpi_device_data_of_node(const struct fwnode_handle *fwnode) if (is_acpi_device_node(fwnode)) { const struct acpi_device *adev = to_acpi_device_node(fwnode); return &adev->data; - } else if (is_acpi_data_node(fwnode)) { + } + if (is_acpi_data_node(fwnode)) { const struct acpi_data_node *dn = to_acpi_data_node(fwnode); return &dn->data; } @@ -739,14 +740,19 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, return -EINVAL; } - /* assume following integer elements are all args */ + /* + * Assume the following integer elements are all args. + * Stop counting on the first reference or end of the + * package arguments. In case of neither reference, + * nor integer, return an error, we can't parse it. + */ for (i = 0; element + i < end && i < num_args; i++) { int type = element[i].type; + if (type == ACPI_TYPE_LOCAL_REFERENCE) + break; if (type == ACPI_TYPE_INTEGER) nargs++; - else if (type == ACPI_TYPE_LOCAL_REFERENCE) - break; else return -EINVAL; } @@ -950,7 +956,7 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, if (proptype != DEV_PROP_STRING && nval > obj->package.count) return -EOVERFLOW; - else if (nval <= 0) + if (nval == 0) return -EINVAL; items = obj->package.elements; @@ -1012,14 +1018,10 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, const struct list_head *head; struct list_head *next; - if (!child || is_acpi_device_node(child)) { + if ((!child || is_acpi_device_node(child)) && adev) { struct acpi_device *child_adev; - if (adev) - head = &adev->children; - else - goto nondev; - + head = &adev->children; if (list_empty(head)) goto nondev; @@ -1089,7 +1091,8 @@ acpi_node_get_parent(const struct fwnode_handle *fwnode) if (is_acpi_data_node(fwnode)) { /* All data nodes have parent pointer so just return that */ return to_acpi_data_node(fwnode)->parent; - } else if (is_acpi_device_node(fwnode)) { + } + if (is_acpi_device_node(fwnode)) { struct device *dev = to_acpi_device_node(fwnode)->dev.parent; if (dev) -- GitLab From 509853f9e1e7b1490dc79f735a5dbafc9298f40d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 11 Feb 2022 19:14:54 +0100 Subject: [PATCH 0515/1586] genirq: Provide generic_handle_irq_safe() Provide generic_handle_irq_safe() which can used from any context. Suggested-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Hans de Goede Reviewed-by: Oleksandr Natalenko Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20220211181500.1856198-2-bigeasy@linutronix.de --- include/linux/irqdesc.h | 1 + kernel/irq/irqdesc.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 93d270ca0c567..a77584593f7d1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); +int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527db3c..346d283d2da14 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -662,6 +662,29 @@ int generic_handle_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(generic_handle_irq); +/** + * generic_handle_irq_safe - Invoke the handler for a particular irq from any + * context. + * @irq: The irq number to handle + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process context). It + * will report an error if not invoked from IRQ context and the irq has been + * marked to enforce IRQ-context only. + */ +int generic_handle_irq_safe(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_to_desc(irq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_irq_safe); + #ifdef CONFIG_IRQ_DOMAIN /** * generic_handle_domain_irq - Invoke the handler for a HW irq belonging -- GitLab From 91d7b75a5888c1824c87bded439db6e3e3ad7e3c Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Jan 2022 21:07:34 +0100 Subject: [PATCH 0516/1586] m68k: Add asm/config.h To avoid 'warning: no previous prototype for' errors, declare all the parse_bootinfo and config function prototypes into asm/config.h and include it in arch/m68k/kernel/setup_mm.c and arch/m68k/*/config.c. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20220121200738.2577697-2-laurent@vivier.eu Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 1 + arch/m68k/apollo/config.c | 1 + arch/m68k/atari/config.c | 1 + arch/m68k/bvme6000/config.c | 1 + arch/m68k/hp300/config.c | 1 + arch/m68k/include/asm/config.h | 33 +++++++++++++++++++++++++++++++++ arch/m68k/kernel/setup_mm.c | 23 +---------------------- arch/m68k/mac/config.c | 1 + arch/m68k/mvme147/config.c | 1 + arch/m68k/mvme16x/config.c | 1 + arch/m68k/q40/config.c | 1 + 11 files changed, 43 insertions(+), 22 deletions(-) create mode 100644 arch/m68k/include/asm/config.h diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index be2dfab48fd42..3137b45750dfc 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -37,6 +37,7 @@ #include #include #include +#include static unsigned long amiga_model; diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 581a5f68d1029..42a8b8e2b6642 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -16,6 +16,7 @@ #include #include #include +#include u_long sio01_physaddr; u_long sio23_physaddr; diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 261a0f57cc9ac..38a7c05781059 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -46,6 +46,7 @@ #include #include #include +#include u_long atari_mch_cookie; EXPORT_SYMBOL(atari_mch_cookie); diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 0c6feafbbd110..9b060d466e036 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -36,6 +36,7 @@ #include #include #include +#include static void bvme6000_get_model(char *model); extern void bvme6000_sched_init(void); diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index ce1eb3d3d55d2..2c92843397c34 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -22,6 +22,7 @@ #include #include /* readb() and writeb() */ #include +#include #include "time.h" diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h new file mode 100644 index 0000000000000..e73ffa23c4f56 --- /dev/null +++ b/arch/m68k/include/asm/config.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file contains prototypes provided by each m68k machine + * to parse bootinfo data structures and to configure the machine + */ + +#ifndef _M68K_CONFIG_H +#define _M68K_CONFIG_H + +extern int amiga_parse_bootinfo(const struct bi_record *record); +extern int apollo_parse_bootinfo(const struct bi_record *record); +extern int atari_parse_bootinfo(const struct bi_record *record); +extern int bvme6000_parse_bootinfo(const struct bi_record *record); +extern int hp300_parse_bootinfo(const struct bi_record *record); +extern int mac_parse_bootinfo(const struct bi_record *record); +extern int mvme147_parse_bootinfo(const struct bi_record *record); +extern int mvme16x_parse_bootinfo(const struct bi_record *record); +extern int q40_parse_bootinfo(const struct bi_record *record); + +extern void config_amiga(void); +extern void config_apollo(void); +extern void config_atari(void); +extern void config_bvme6000(void); +extern void config_hp300(void); +extern void config_mac(void); +extern void config_mvme147(void); +extern void config_mvme16x(void); +extern void config_q40(void); +extern void config_sun3(void); +extern void config_sun3x(void); + +#endif /* _M68K_CONFIG_H */ diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 49e573b943268..8228275aae3e8 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -47,6 +47,7 @@ #endif #include #include +#include #if !FPSTATESIZE || !NR_IRQS #warning No CPU/platform type selected, your kernel will not work! @@ -113,28 +114,6 @@ EXPORT_SYMBOL(isa_type); EXPORT_SYMBOL(isa_sex); #endif -extern int amiga_parse_bootinfo(const struct bi_record *); -extern int atari_parse_bootinfo(const struct bi_record *); -extern int mac_parse_bootinfo(const struct bi_record *); -extern int q40_parse_bootinfo(const struct bi_record *); -extern int bvme6000_parse_bootinfo(const struct bi_record *); -extern int mvme16x_parse_bootinfo(const struct bi_record *); -extern int mvme147_parse_bootinfo(const struct bi_record *); -extern int hp300_parse_bootinfo(const struct bi_record *); -extern int apollo_parse_bootinfo(const struct bi_record *); - -extern void config_amiga(void); -extern void config_atari(void); -extern void config_mac(void); -extern void config_sun3(void); -extern void config_apollo(void); -extern void config_mvme147(void); -extern void config_mvme16x(void); -extern void config_bvme6000(void); -extern void config_hp300(void); -extern void config_q40(void); -extern void config_sun3x(void); - #define MASK_256K 0xfffc0000 extern void paging_init(void); diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 5d16f9b47aa90..65d124ec80bb6 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -47,6 +47,7 @@ #include #include #include +#include /* Mac bootinfo struct */ struct mac_booter_data mac_bi_data; diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index dfd6202fd403e..b96ea7c76a197 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -34,6 +34,7 @@ #include #include #include +#include static void mvme147_get_model(char *model); diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index b4422c2dfbbf4..88cbdc10925b9 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -37,6 +37,7 @@ #include #include #include +#include extern t_bdid mvme_bdid; diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 5caf1e5be1c2b..9237243077cee 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -34,6 +34,7 @@ #include #include #include +#include extern void q40_init_IRQ(void); static void q40_get_model(char *model); -- GitLab From c4d5b6eef2581dd893bea4edb374e480e5a90ce6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Jan 2022 18:30:06 +0100 Subject: [PATCH 0517/1586] m68k: mm: Remove check for VM_IO to fix deferred I/O When an application accesses a mapped frame buffer backed by deferred I/O, it receives a segmentation fault. Fix this by removing the check for VM_IO in do_page_fault(). Signed-off-by: Geert Uytterhoeven Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20220128173006.1713210-1-geert@linux-m68k.org --- arch/m68k/mm/fault.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 1493cf5eac1e7..71aa9f6315dc8 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -93,8 +93,6 @@ retry: vma = find_vma(mm, address); if (!vma) goto map_err; - if (vma->vm_flags & VM_IO) - goto acc_err; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) -- GitLab From b252ada293d5d30566121c61fa7552e74396d533 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 26 Jan 2022 12:26:05 +0100 Subject: [PATCH 0518/1586] dt-bindings: mtd: spi-nor: Allow two CS per device The Xilinx QSPI controller has two advanced modes which allow the controller to behave differently and consider two flashes as one single storage. One of these two modes is quite complex to support from a binding point of view and is the dual parallel memories. In this mode, each byte of data is stored in both devices: the even bits in one, the odd bits in the other. The split is automatically handled by the QSPI controller and is transparent for the user. The other mode is simpler to support, it is called dual stacked memories. The controller shares the same SPI bus but each of the devices contain half of the data. Once in this mode, the controller does not follow CS requests but instead internally wires the two CS levels with the value of the most significant address bit. Supporting these two modes will involve core changes which include the possibility of providing two CS for a single SPI device Signed-off-by: Miquel Raynal Acked-by: Rob Herring Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/20220126112608.955728-2-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml index 39421f7233e4c..4abfb4cfc1571 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml @@ -47,7 +47,8 @@ properties: identified by the JEDEC READ ID opcode (0x9F). reg: - maxItems: 1 + minItems: 1 + maxItems: 2 spi-max-frequency: true spi-rx-bus-width: true -- GitLab From e2edd1b64f1c79e8abda365149ed62a2a9a494b4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 26 Jan 2022 12:26:06 +0100 Subject: [PATCH 0519/1586] spi: dt-bindings: Describe stacked/parallel memories modes Describe two new memories modes: - A stacked mode when the bus is common but the address space extended with an additinals wires. - A parallel mode with parallel busses accessing parallel flashes where the data is spread. Signed-off-by: Miquel Raynal Acked-by: Pratyush Yadav Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220126112608.955728-3-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- .../bindings/spi/spi-peripheral-props.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index 5dd209206e880..fedb7ae98ff6f 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -82,6 +82,31 @@ properties: description: Delay, in microseconds, after a write transfer. + stacked-memories: + description: Several SPI memories can be wired in stacked mode. + This basically means that either a device features several chip + selects, or that different devices must be seen as a single + bigger chip. This basically doubles (or more) the total address + space with only a single additional wire, while still needing + to repeat the commands when crossing a chip boundary. The size of + each chip should be provided as members of the array. + $ref: /schemas/types.yaml#/definitions/uint64-array + minItems: 2 + maxItems: 4 + + parallel-memories: + description: Several SPI memories can be wired in parallel mode. + The devices are physically on a different buses but will always + act synchronously as each data word is spread across the + different memories (eg. even bits are stored in one memory, odd + bits in the other). This basically doubles the address space and + the throughput while greatly complexifying the wiring because as + many busses as devices must be wired. The size of each chip should + be provided as members of the array. + $ref: /schemas/types.yaml#/definitions/uint64-array + minItems: 2 + maxItems: 4 + # The controller specific properties go here. allOf: - $ref: cdns,qspi-nor-peripheral-props.yaml# -- GitLab From eba5368503b4291db7819512600fa014ea17c5a8 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 26 Jan 2022 12:26:07 +0100 Subject: [PATCH 0520/1586] spi: dt-bindings: Add an example with two stacked flashes Provide an example of how to describe two flashes in eg. stacked mode. Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220126112608.955728-4-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-controller.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml index 36b72518f5654..0f4d40218400e 100644 --- a/Documentation/devicetree/bindings/spi/spi-controller.yaml +++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml @@ -139,4 +139,11 @@ examples: spi-max-frequency = <100000>; reg = <1>; }; + + flash@2 { + compatible = "jedec,spi-nor"; + spi-max-frequency = <50000000>; + reg = <2>, <3>; + stacked-memories = /bits/ 64 <0x10000000 0x10000000>; + }; }; -- GitLab From c5a3106aa4923bec979c2a76667a493cb5d134fd Mon Sep 17 00:00:00 2001 From: "Minghao Chi (CGEL ZTE)" Date: Mon, 21 Feb 2022 02:02:33 +0000 Subject: [PATCH 0521/1586] spi: Use of_device_get_match_data() Use of_device_get_match_data() to simplify the code. Reported-by: Zeal Robot Signed-off-by: Minghao Chi (CGEL ZTE) Link: https://lore.kernel.org/r/20220221020233.1925154-1-chi.minghao@zte.com.cn Signed-off-by: Mark Brown --- drivers/spi/spi-lantiq-ssc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c index bcb52601804a9..aae26f62ea87a 100644 --- a/drivers/spi/spi-lantiq-ssc.c +++ b/drivers/spi/spi-lantiq-ssc.c @@ -906,17 +906,11 @@ static int lantiq_ssc_probe(struct platform_device *pdev) struct spi_master *master; struct lantiq_ssc_spi *spi; const struct lantiq_ssc_hwcfg *hwcfg; - const struct of_device_id *match; u32 id, supports_dma, revision; unsigned int num_cs; int err; - match = of_match_device(lantiq_ssc_match, dev); - if (!match) { - dev_err(dev, "no device match\n"); - return -EINVAL; - } - hwcfg = match->data; + hwcfg = of_device_get_match_data(dev); master = spi_alloc_master(dev, sizeof(struct lantiq_ssc_spi)); if (!master) -- GitLab From 6e8ec2552c7d13991148e551e3325a624d73fac6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 16 Jan 2022 14:23:10 +0100 Subject: [PATCH 0522/1586] random: use computational hash for entropy extraction The current 4096-bit LFSR used for entropy collection had a few desirable attributes for the context in which it was created. For example, the state was huge, which meant that /dev/random would be able to output quite a bit of accumulated entropy before blocking. It was also, in its time, quite fast at accumulating entropy byte-by-byte, which matters given the varying contexts in which mix_pool_bytes() is called. And its diffusion was relatively high, which meant that changes would ripple across several words of state rather quickly. However, it also suffers from a few security vulnerabilities. In particular, inputs learned by an attacker can be undone, but moreover, if the state of the pool leaks, its contents can be controlled and entirely zeroed out. I've demonstrated this attack with this SMT2 script, , which Boolector/CaDiCal solves in a matter of seconds on a single core of my laptop, resulting in little proof of concept C demonstrators such as . For basically all recent formal models of RNGs, these attacks represent a significant cryptographic flaw. But how does this manifest practically? If an attacker has access to the system to such a degree that he can learn the internal state of the RNG, arguably there are other lower hanging vulnerabilities -- side-channel, infoleak, or otherwise -- that might have higher priority. On the other hand, seed files are frequently used on systems that have a hard time generating much entropy on their own, and these seed files, being files, often leak or are duplicated and distributed accidentally, or are even seeded over the Internet intentionally, where their contents might be recorded or tampered with. Seen this way, an otherwise quasi-implausible vulnerability is a bit more practical than initially thought. Another aspect of the current mix_pool_bytes() function is that, while its performance was arguably competitive for the time in which it was created, it's no longer considered so. This patch improves performance significantly: on a high-end CPU, an i7-11850H, it improves performance of mix_pool_bytes() by 225%, and on a low-end CPU, a Cortex-A7, it improves performance by 103%. This commit replaces the LFSR of mix_pool_bytes() with a straight- forward cryptographic hash function, BLAKE2s, which is already in use for pool extraction. Universal hashing with a secret seed was considered too, something along the lines of , but the requirement for a secret seed makes for a chicken & egg problem. Instead we go with a formally proven scheme using a computational hash function, described in sections 5.1, 6.4, and B.1.8 of . BLAKE2s outputs 256 bits, which should give us an appropriate amount of min-entropy accumulation, and a wide enough margin of collision resistance against active attacks. mix_pool_bytes() becomes a simple call to blake2s_update(), for accumulation, while the extraction step becomes a blake2s_final() to generate a seed, with which we can then do a HKDF-like or BLAKE2X-like expansion, the first part of which we fold back as an init key for subsequent blake2s_update()s, and the rest we produce to the caller. This then is provided to our CRNG like usual. In that expansion step, we make opportunistic use of 32 bytes of RDRAND output, just as before. We also always reseed the crng with 32 bytes, unconditionally, or not at all, rather than sometimes with 16 as before, as we don't win anything by limiting beyond the 16 byte threshold. Going for a hash function as an entropy collector is a conservative, proven approach. The result of all this is a much simpler and much less bespoke construction than what's there now, which not only plugs a vulnerability but also improves performance considerably. Cc: Theodore Ts'o Cc: Dominik Brodowski Reviewed-by: Eric Biggers Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 304 ++++++++---------------------------------- 1 file changed, 55 insertions(+), 249 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3404a91edf292..882f78829a24c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -42,61 +42,6 @@ */ /* - * (now, with legal B.S. out of the way.....) - * - * This routine gathers environmental noise from device drivers, etc., - * and returns good random numbers, suitable for cryptographic use. - * Besides the obvious cryptographic uses, these numbers are also good - * for seeding TCP sequence numbers, and other places where it is - * desirable to have numbers which are not only random, but hard to - * predict by an attacker. - * - * Theory of operation - * =================== - * - * Computers are very predictable devices. Hence it is extremely hard - * to produce truly random numbers on a computer --- as opposed to - * pseudo-random numbers, which can easily generated by using a - * algorithm. Unfortunately, it is very easy for attackers to guess - * the sequence of pseudo-random number generators, and for some - * applications this is not acceptable. So instead, we must try to - * gather "environmental noise" from the computer's environment, which - * must be hard for outside attackers to observe, and use that to - * generate random numbers. In a Unix environment, this is best done - * from inside the kernel. - * - * Sources of randomness from the environment include inter-keyboard - * timings, inter-interrupt timings from some interrupts, and other - * events which are both (a) non-deterministic and (b) hard for an - * outside observer to measure. Randomness from these sources are - * added to an "entropy pool", which is mixed using a CRC-like function. - * This is not cryptographically strong, but it is adequate assuming - * the randomness is not chosen maliciously, and it is fast enough that - * the overhead of doing it on every interrupt is very reasonable. - * As random bytes are mixed into the entropy pool, the routines keep - * an *estimate* of how many bits of randomness have been stored into - * the random number generator's internal state. - * - * When random bytes are desired, they are obtained by taking the BLAKE2s - * hash of the contents of the "entropy pool". The BLAKE2s hash avoids - * exposing the internal state of the entropy pool. It is believed to - * be computationally infeasible to derive any useful information - * about the input of BLAKE2s from its output. Even if it is possible to - * analyze BLAKE2s in some clever way, as long as the amount of data - * returned from the generator is less than the inherent entropy in - * the pool, the output data is totally unpredictable. For this - * reason, the routine decreases its internal estimate of how many - * bits of "true randomness" are contained in the entropy pool as it - * outputs random numbers. - * - * If this estimate goes to zero, the routine can still generate - * random numbers; however, an attacker may (at least in theory) be - * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of BLAKE2s, which is - * not believed to be feasible, but there is a remote possibility. - * Nonetheless, these numbers should be useful for the vast majority - * of purposes. - * * Exported interfaces ---- output * =============================== * @@ -298,23 +243,6 @@ * * mknod /dev/random c 1 8 * mknod /dev/urandom c 1 9 - * - * Acknowledgements: - * ================= - * - * Ideas for constructing this random number generator were derived - * from Pretty Good Privacy's random number generator, and from private - * discussions with Phil Karn. Colin Plumb provided a faster random - * number generator, which speed up the mixing function of the entropy - * pool, taken from PGPfone. Dale Worley has also contributed many - * useful ideas and suggestions to improve this driver. - * - * Any flaws in the design are solely my responsibility, and should - * not be attributed to the Phil, Colin, or any of authors of PGP. - * - * Further background information on this topic may be obtained from - * RFC 1750, "Randomness Recommendations for Security", by Donald - * Eastlake, Steve Crocker, and Jeff Schiller. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -358,79 +286,15 @@ /* #define ADD_INTERRUPT_BENCH */ -/* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. - */ -static int random_write_wakeup_bits = 28 * (1 << 5); - -/* - * Originally, we used a primitive polynomial of degree .poolwords - * over GF(2). The taps for various sizes are defined below. They - * were chosen to be evenly spaced except for the last tap, which is 1 - * to get the twisting happening as fast as possible. - * - * For the purposes of better mixing, we use the CRC-32 polynomial as - * well to make a (modified) twisted Generalized Feedback Shift - * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR - * generators. ACM Transactions on Modeling and Computer Simulation - * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted - * GFSR generators II. ACM Transactions on Modeling and Computer - * Simulation 4:254-266) - * - * Thanks to Colin Plumb for suggesting this. - * - * The mixing operation is much less sensitive than the output hash, - * where we use BLAKE2s. All that we want of mixing operation is that - * it be a good non-cryptographic hash; i.e. it not produce collisions - * when fed "random" data of the sort we expect to see. As long as - * the pool state differs for different inputs, we have preserved the - * input entropy and done a good job. The fact that an intelligent - * attacker can construct inputs that will produce controlled - * alterations to the pool's state is not important because we don't - * consider such inputs to contribute any randomness. The only - * property we need with respect to them is that the attacker can't - * increase his/her knowledge of the pool's state. Since all - * additions are reversible (knowing the final state and the input, - * you can reconstruct the initial state), if an attacker has any - * uncertainty about the initial state, he/she can only shuffle that - * uncertainty about, but never cause any collisions (which would - * decrease the uncertainty). - * - * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and - * Videau in their paper, "The Linux Pseudorandom Number Generator - * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their - * paper, they point out that we are not using a true Twisted GFSR, - * since Matsumoto & Kurita used a trinomial feedback polynomial (that - * is, with only three taps, instead of the six that we are using). - * As a result, the resulting polynomial is neither primitive nor - * irreducible, and hence does not have a maximal period over - * GF(2**32). They suggest a slight change to the generator - * polynomial which improves the resulting TGFSR polynomial to be - * irreducible, which we have made here. - */ enum poolinfo { - POOL_WORDS = 128, - POOL_WORDMASK = POOL_WORDS - 1, - POOL_BYTES = POOL_WORDS * sizeof(u32), - POOL_BITS = POOL_BYTES * 8, + POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_BITSHIFT = ilog2(POOL_BITS), /* To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. */ POOL_ENTROPY_SHIFT = 3, #define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, - - /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - POOL_TAP1 = 104, - POOL_TAP2 = 76, - POOL_TAP3 = 51, - POOL_TAP4 = 25, - POOL_TAP5 = 1, - - EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2 + POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT }; /* @@ -438,6 +302,12 @@ enum poolinfo { */ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; +/* + * If the entropy count falls under this number of bits, then we + * should wake up processes which are selecting or polling on write + * access to /dev/random. + */ +static int random_write_wakeup_bits = POOL_BITS * 3 / 4; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); @@ -493,73 +363,31 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); * **********************************************************************/ -static u32 input_pool_data[POOL_WORDS] __latent_entropy; - static struct { + struct blake2s_state hash; spinlock_t lock; - u16 add_ptr; - u16 input_rotate; int entropy_count; } input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static ssize_t extract_entropy(void *buf, size_t nbytes, int min); -static ssize_t _extract_entropy(void *buf, size_t nbytes); +static bool extract_entropy(void *buf, size_t nbytes, int min); +static void _extract_entropy(void *buf, size_t nbytes); static void crng_reseed(struct crng_state *crng, bool use_input_pool); -static const u32 twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; - /* * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call * credit_entropy_bits if this is appropriate. - * - * The pool is stirred with a primitive polynomial of the appropriate - * degree, and then twisted. We twist by three bits at a time because - * it's cheap to do so and helps slightly in the expected case where - * the entropy is concentrated in the low-order bits. */ static void _mix_pool_bytes(const void *in, int nbytes) { - unsigned long i; - int input_rotate; - const u8 *bytes = in; - u32 w; - - input_rotate = input_pool.input_rotate; - i = input_pool.add_ptr; - - /* mix one byte at a time to simplify size handling and churn faster */ - while (nbytes--) { - w = rol32(*bytes++, input_rotate); - i = (i - 1) & POOL_WORDMASK; - - /* XOR in the various taps */ - w ^= input_pool_data[i]; - w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK]; - - /* Mix the result back in with a twist */ - input_pool_data[i] = (w >> 3) ^ twist_table[w & 7]; - - /* - * Normally, we add 7 bits of rotation to the pool. - * At the beginning of the pool, add an extra 7 bits - * rotation, so that successive passes spread the - * input bits across the pool evenly. - */ - input_rotate = (input_rotate + (i ? 7 : 14)) & 31; - } - - input_pool.input_rotate = input_rotate; - input_pool.add_ptr = i; + blake2s_update(&input_pool.hash, in, nbytes); } static void __mix_pool_bytes(const void *in, int nbytes) @@ -953,15 +781,14 @@ static int crng_slow_load(const u8 *cp, size_t len) static void crng_reseed(struct crng_state *crng, bool use_input_pool) { unsigned long flags; - int i, num; + int i; union { u8 block[CHACHA_BLOCK_SIZE]; u32 key[8]; } buf; if (use_input_pool) { - num = extract_entropy(&buf, 32, 16); - if (num == 0) + if (!extract_entropy(&buf, 32, 16)) return; } else { _extract_crng(&primary_crng, buf.block); @@ -1329,74 +1156,48 @@ retry: } /* - * This function does the actual extraction for extract_entropy. - * - * Note: we assume that .poolwords is a multiple of 16 words. + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. */ -static void extract_buf(u8 *out) +static void _extract_entropy(void *buf, size_t nbytes) { - struct blake2s_state state __aligned(__alignof__(unsigned long)); - u8 hash[BLAKE2S_HASH_SIZE]; - unsigned long *salt; unsigned long flags; - - blake2s_init(&state, sizeof(hash)); - - /* - * If we have an architectural hardware random number - * generator, use it for BLAKE2's salt & personal fields. - */ - for (salt = (unsigned long *)&state.h[4]; - salt < (unsigned long *)&state.h[8]; ++salt) { - unsigned long v; - if (!arch_get_random_long(&v)) - break; - *salt ^= v; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdrand[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { + if (!arch_get_random_long(&block.rdrand[i])) + block.rdrand[i] = random_get_entropy(); } - /* Generate a hash across the pool */ spin_lock_irqsave(&input_pool.lock, flags); - blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES); - blake2s_final(&state, hash); /* final zeros out state */ - /* - * We mix the hash back into the pool to prevent backtracking - * attacks (where the attacker knows the state of the pool - * plus the current outputs, and attempts to find previous - * outputs), unless the hash function can be inverted. By - * mixing at least a hash worth of hash data back, we make - * brute-forcing the feedback as hard as brute-forcing the - * hash. - */ - __mix_pool_bytes(hash, sizeof(hash)); - spin_unlock_irqrestore(&input_pool.lock, flags); + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); - /* Note that EXTRACT_SIZE is half of hash size here, because above - * we've dumped the full length back into mixer. By reducing the - * amount that we emit, we retain a level of forward secrecy. - */ - memcpy(out, hash, EXTRACT_SIZE); - memzero_explicit(hash, sizeof(hash)); -} + /* next_key = HASHPRF(seed, RDRAND || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); -static ssize_t _extract_entropy(void *buf, size_t nbytes) -{ - ssize_t ret = 0, i; - u8 tmp[EXTRACT_SIZE]; + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); while (nbytes) { - extract_buf(tmp); - i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); + i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDRAND || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); nbytes -= i; buf += i; - ret += i; } - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); } /* @@ -1404,13 +1205,18 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes) * returns it in a buffer. * * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding. + * failing to avoid races that defeat catastrophic reseeding. If we + * have less than min entropy available, we return false and buf is + * not filled. */ -static ssize_t extract_entropy(void *buf, size_t nbytes, int min) +static bool extract_entropy(void *buf, size_t nbytes, int min) { trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); - nbytes = account(nbytes, min); - return _extract_entropy(buf, nbytes); + if (account(nbytes, min)) { + _extract_entropy(buf, nbytes); + return true; + } + return false; } #define warn_unseeded_randomness(previous) \ @@ -1674,7 +1480,7 @@ static void __init init_std_data(void) unsigned long rv; mix_pool_bytes(&now, sizeof(now)); - for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { + for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) { if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); -- GitLab From 9c07f57869e90140080cfc282cc628d123e27704 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 2 Feb 2022 13:30:03 +0100 Subject: [PATCH 0523/1586] random: simplify entropy debiting Our pool is 256 bits, and we only ever use all of it or don't use it at all, which is decided by whether or not it has at least 128 bits in it. So we can drastically simplify the accounting and cmpxchg loop to do exactly this. While we're at it, we move the minimum bit size into a constant so it can be shared between the two places where it matters. The reason we want any of this is for the case in which an attacker has compromised the current state, and then bruteforces small amounts of entropy added to it. By demanding a particular minimum amount of entropy be present before reseeding, we make that bruteforcing difficult. Note that this rationale no longer includes anything about /dev/random blocking at the right moment, since /dev/random no longer blocks (except for at ~boot), but rather uses the crng. In a former life, /dev/random was different and therefore required a more nuanced account(), but this is no longer. Behaviorally, nothing changes here. This is just a simplification of the code. Cc: Theodore Ts'o Cc: Greg Kroah-Hartman Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 91 ++++++++--------------------------- include/trace/events/random.h | 30 +++--------- 2 files changed, 27 insertions(+), 94 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 882f78829a24c..9014fd54da647 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -289,12 +289,14 @@ enum poolinfo { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_BITSHIFT = ilog2(POOL_BITS), + POOL_MIN_BITS = POOL_BITS / 2, /* To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. */ POOL_ENTROPY_SHIFT = 3, #define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT + POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, + POOL_MIN_FRACBITS = POOL_MIN_BITS << POOL_ENTROPY_SHIFT }; /* @@ -375,8 +377,7 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static bool extract_entropy(void *buf, size_t nbytes, int min); -static void _extract_entropy(void *buf, size_t nbytes); +static void extract_entropy(void *buf, size_t nbytes); static void crng_reseed(struct crng_state *crng, bool use_input_pool); @@ -467,7 +468,7 @@ static void process_random_ready_list(void) */ static void credit_entropy_bits(int nbits) { - int entropy_count, entropy_bits, orig; + int entropy_count, orig; int nfrac = nbits << POOL_ENTROPY_SHIFT; /* Ensure that the multiplication can avoid being 64 bits wide. */ @@ -527,8 +528,7 @@ retry: trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); - entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT; - if (crng_init < 2 && entropy_bits >= 128) + if (crng_init < 2 && entropy_count >= POOL_MIN_FRACBITS) crng_reseed(&primary_crng, true); } @@ -618,7 +618,7 @@ static void crng_initialize_secondary(struct crng_state *crng) static void __init crng_initialize_primary(void) { - _extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); @@ -788,8 +788,17 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) } buf; if (use_input_pool) { - if (!extract_entropy(&buf, 32, 16)) - return; + int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_FRACBITS) + return; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf.key, sizeof(buf.key)); + if (random_write_wakeup_bits) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, @@ -1114,52 +1123,11 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * *********************************************************************/ -/* - * This function decides how many bytes to actually take from the - * given pool, and also debits the entropy count accordingly. - */ -static size_t account(size_t nbytes, int min) -{ - int entropy_count, orig; - size_t ibytes, nfrac; - - BUG_ON(input_pool.entropy_count > POOL_FRACBITS); - - /* Can we pull enough? */ -retry: - entropy_count = orig = READ_ONCE(input_pool.entropy_count); - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy count: count %d\n", entropy_count); - entropy_count = 0; - } - - /* never pull more than available */ - ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3)); - if (ibytes < min) - ibytes = 0; - nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); - if ((size_t)entropy_count > nfrac) - entropy_count -= nfrac; - else - entropy_count = 0; - - if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) - goto retry; - - trace_debit_entropy(8 * ibytes); - if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - - return ibytes; -} - /* * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. */ -static void _extract_entropy(void *buf, size_t nbytes) +static void extract_entropy(void *buf, size_t nbytes) { unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; @@ -1169,6 +1137,8 @@ static void _extract_entropy(void *buf, size_t nbytes) } block; size_t i; + trace_extract_entropy(nbytes, POOL_ENTROPY_BITS()); + for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { if (!arch_get_random_long(&block.rdrand[i])) block.rdrand[i] = random_get_entropy(); @@ -1200,25 +1170,6 @@ static void _extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } -/* - * This function extracts randomness from the "entropy pool", and - * returns it in a buffer. - * - * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding. If we - * have less than min entropy available, we return false and buf is - * not filled. - */ -static bool extract_entropy(void *buf, size_t nbytes, int min) -{ - trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); - if (account(nbytes, min)) { - _extract_entropy(buf, nbytes); - return true; - } - return false; -} - #define warn_unseeded_randomness(previous) \ _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) diff --git a/include/trace/events/random.h b/include/trace/events/random.h index a2d9aa16a5d7a..ad149aeaf42c5 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -79,22 +79,6 @@ TRACE_EVENT(credit_entropy_bits, __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); -TRACE_EVENT(debit_entropy, - TP_PROTO(int debit_bits), - - TP_ARGS( debit_bits), - - TP_STRUCT__entry( - __field( int, debit_bits ) - ), - - TP_fast_assign( - __entry->debit_bits = debit_bits; - ), - - TP_printk("input pool: debit_bits %d", __entry->debit_bits) -); - TRACE_EVENT(add_input_randomness, TP_PROTO(int input_bits), @@ -161,31 +145,29 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(int nbytes, int entropy_count), - TP_ARGS(nbytes, entropy_count, IP), + TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( __field( int, nbytes ) __field( int, entropy_count ) - __field(unsigned long, IP ) ), TP_fast_assign( __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; - __entry->IP = IP; ), - TP_printk("input pool: nbytes %d entropy_count %d caller %pS", - __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) + TP_printk("input pool: nbytes %d entropy_count %d", + __entry->nbytes, __entry->entropy_count) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), + TP_PROTO(int nbytes, int entropy_count), - TP_ARGS(nbytes, entropy_count, IP) + TP_ARGS(nbytes, entropy_count) ); TRACE_EVENT(urandom_read, -- GitLab From c570449094844527577c5c914140222cb1893e3f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 3 Feb 2022 13:28:06 +0100 Subject: [PATCH 0524/1586] random: use linear min-entropy accumulation crediting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 30e37ec516ae ("random: account for entropy loss due to overwrites") assumed that adding new entropy to the LFSR pool probabilistically cancelled out old entropy there, so entropy was credited asymptotically, approximating Shannon entropy of independent sources (rather than a stronger min-entropy notion) using 1/8th fractional bits and replacing a constant 2-2/√𝑒 term (~0.786938) with 3/4 (0.75) to slightly underestimate it. This wasn't superb, but it was perhaps better than nothing, so that's what was done. Which entropy specifically was being cancelled out and how much precisely each time is hard to tell, though as I showed with the attack code in my previous commit, a motivated adversary with sufficient information can actually cancel out everything. Since we're no longer using an LFSR for entropy accumulation, this probabilistic cancellation is no longer relevant. Rather, we're now using a computational hash function as the accumulator and we've switched to working in the random oracle model, from which we can now revisit the question of min-entropy accumulation, which is done in detail in . Consider a long input bit string that is built by concatenating various smaller independent input bit strings. Each one of these inputs has a designated min-entropy, which is what we're passing to credit_entropy_bits(h). When we pass the concatenation of these to a random oracle, it means that an adversary trying to receive back the same reply as us would need to become certain about each part of the concatenated bit string we passed in, which means becoming certain about all of those h values. That means we can estimate the accumulation by simply adding up the h values in calls to credit_entropy_bits(h); there's no probabilistic cancellation at play like there was said to be for the LFSR. Incidentally, this is also what other entropy accumulators based on computational hash functions do as well. So this commit replaces credit_entropy_bits(h) with essentially `total = min(POOL_BITS, total + h)`, done with a cmpxchg loop as before. What if we're wrong and the above is nonsense? It's not, but let's assume we don't want the actual _behavior_ of the code to change much. Currently that behavior is not extracting from the input pool until it has 128 bits of entropy in it. With the old algorithm, we'd hit that magic 128 number after roughly 256 calls to credit_entropy_bits(1). So, we can retain more or less the old behavior by waiting to extract from the input pool until it hits 256 bits of entropy using the new code. For people concerned about this change, it means that there's not that much practical behavioral change. And for folks actually trying to model the behavior rigorously, it means that we have an even higher margin against attacks. Cc: Theodore Ts'o Cc: Dominik Brodowski Cc: Greg Kroah-Hartman Reviewed-by: Eric Biggers Reviewed-by: Jean-Philippe Aumasson Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 114 ++++++++---------------------------------- 1 file changed, 20 insertions(+), 94 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9014fd54da647..20538e9b1a2cf 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -286,17 +286,9 @@ /* #define ADD_INTERRUPT_BENCH */ -enum poolinfo { +enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_BITSHIFT = ilog2(POOL_BITS), - POOL_MIN_BITS = POOL_BITS / 2, - - /* To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. */ - POOL_ENTROPY_SHIFT = 3, -#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, - POOL_MIN_FRACBITS = POOL_MIN_BITS << POOL_ENTROPY_SHIFT + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ }; /* @@ -309,7 +301,7 @@ static struct fasync_struct *fasync; * should wake up processes which are selecting or polling on write * access to /dev/random. */ -static int random_write_wakeup_bits = POOL_BITS * 3 / 4; +static int random_write_wakeup_bits = POOL_MIN_BITS; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); @@ -469,66 +461,18 @@ static void process_random_ready_list(void) static void credit_entropy_bits(int nbits) { int entropy_count, orig; - int nfrac = nbits << POOL_ENTROPY_SHIFT; - - /* Ensure that the multiplication can avoid being 64 bits wide. */ - BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31); if (!nbits) return; -retry: - entropy_count = orig = READ_ONCE(input_pool.entropy_count); - if (nfrac < 0) { - /* Debit */ - entropy_count += nfrac; - } else { - /* - * Credit: we have to account for the possibility of - * overwriting already present entropy. Even in the - * ideal case of pure Shannon entropy, new contributions - * approach the full value asymptotically: - * - * entropy <- entropy + (pool_size - entropy) * - * (1 - exp(-add_entropy/pool_size)) - * - * For add_entropy <= pool_size/2 then - * (1 - exp(-add_entropy/pool_size)) >= - * (add_entropy/pool_size)*0.7869... - * so we can approximate the exponential with - * 3/4*add_entropy/pool_size and still be on the - * safe side by adding at most pool_size/2 at a time. - * - * The use of pool_size-2 in the while statement is to - * prevent rounding artifacts from making the loop - * arbitrarily long; this limits the loop to log2(pool_size)*2 - * turns no matter how large nbits is. - */ - int pnfrac = nfrac; - const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2; - /* The +2 corresponds to the /4 in the denominator */ - - do { - unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2); - unsigned int add = - ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s; - - entropy_count += add; - pnfrac -= anfrac; - } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac)); - } - - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy/overflow: count %d\n", entropy_count); - entropy_count = 0; - } else if (entropy_count > POOL_FRACBITS) - entropy_count = POOL_FRACBITS; - if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) - goto retry; + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min(POOL_BITS, orig + nbits); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); + trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); - if (crng_init < 2 && entropy_count >= POOL_MIN_FRACBITS) + if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) crng_reseed(&primary_crng, true); } @@ -791,7 +735,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) int entropy_count; do { entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_FRACBITS) + if (entropy_count < POOL_MIN_BITS) return; } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); extract_entropy(buf.key, sizeof(buf.key)); @@ -1014,7 +958,7 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(POOL_ENTROPY_BITS()); + trace_add_input_randomness(input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -1112,7 +1056,7 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS()); + trace_add_disk_randomness(disk_devt(disk), input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -1137,7 +1081,7 @@ static void extract_entropy(void *buf, size_t nbytes) } block; size_t i; - trace_extract_entropy(nbytes, POOL_ENTROPY_BITS()); + trace_extract_entropy(nbytes, input_pool.entropy_count); for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { if (!arch_get_random_long(&block.rdrand[i])) @@ -1486,9 +1430,9 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, { int ret; - nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3)); + nbytes = min_t(size_t, nbytes, INT_MAX >> 6); ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS()); + trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count); return ret; } @@ -1527,7 +1471,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (POOL_ENTROPY_BITS() < random_write_wakeup_bits) + if (input_pool.entropy_count < random_write_wakeup_bits) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } @@ -1582,8 +1526,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - ent_count = POOL_ENTROPY_BITS(); - if (put_user(ent_count, p)) + if (put_user(input_pool.entropy_count, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -1734,23 +1677,6 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } -/* - * Return entropy available scaled to integral bits - */ -static int proc_do_entropy(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - int entropy_count; - - entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT; - - fake_table.data = &entropy_count; - fake_table.maxlen = sizeof(entropy_count); - - return proc_dointvec(&fake_table, write, buffer, lenp, ppos); -} - static int sysctl_poolsize = POOL_BITS; static struct ctl_table random_table[] = { { @@ -1762,10 +1688,10 @@ static struct ctl_table random_table[] = { }, { .procname = "entropy_avail", + .data = &input_pool.entropy_count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_do_entropy, - .data = &input_pool.entropy_count, + .proc_handler = proc_dointvec, }, { .procname = "write_wakeup_threshold", @@ -1972,7 +1898,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, */ wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - POOL_ENTROPY_BITS() <= random_write_wakeup_bits, + input_pool.entropy_count <= random_write_wakeup_bits, CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); -- GitLab From 489c7fc44b5740d377e8cfdbf0851036e493af00 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 5 Feb 2022 14:00:58 +0100 Subject: [PATCH 0525/1586] random: always wake up entropy writers after extraction Now that POOL_BITS == POOL_MIN_BITS, we must unconditionally wake up entropy writers after every extraction. Therefore there's no point of write_wakeup_threshold, so we can move it to the dustbin of unused compatibility sysctls. While we're at it, we can fix a small comparison where we were waking up after <= min rather than < min. Cc: Theodore Ts'o Suggested-by: Eric Biggers Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- Documentation/admin-guide/sysctl/kernel.rst | 7 +++-- drivers/char/random.c | 33 +++++++-------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d359bcfadd39a..d3c6d9a501a9c 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1029,14 +1029,17 @@ This is a directory, with the following entries: * ``poolsize``: the entropy pool size, in bits; * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum - number of seconds between urandom pool reseeding). + number of seconds between urandom pool reseeding). This file is + writable for compatibility purposes, but writing to it has no effect + on any RNG behavior. * ``uuid``: a UUID generated every time this is retrieved (this can thus be used to generate UUIDs at will); * ``write_wakeup_threshold``: when the entropy count drops below this (as a number of bits), processes waiting to write to ``/dev/random`` - are woken up. + are woken up. This file is writable for compatibility purposes, but + writing to it has no effect on any RNG behavior. If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH`` defined, these additional entries are present: diff --git a/drivers/char/random.c b/drivers/char/random.c index 20538e9b1a2cf..3b30e764eeef4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -296,12 +296,6 @@ enum { */ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; -/* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. - */ -static int random_write_wakeup_bits = POOL_MIN_BITS; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); @@ -739,10 +733,8 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) return; } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); extract_entropy(buf.key, sizeof(buf.key)); - if (random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, @@ -1471,7 +1463,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait) mask = 0; if (crng_ready()) mask |= EPOLLIN | EPOLLRDNORM; - if (input_pool.entropy_count < random_write_wakeup_bits) + if (input_pool.entropy_count < POOL_MIN_BITS) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } @@ -1556,7 +1548,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { + if (xchg(&input_pool.entropy_count, 0)) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1636,9 +1628,9 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, #include -static int min_write_thresh; -static int max_write_thresh = POOL_BITS; static int random_min_urandom_seed = 60; +static int random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_poolsize = POOL_BITS; static char sysctl_bootid[16]; /* @@ -1677,7 +1669,6 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } -static int sysctl_poolsize = POOL_BITS; static struct ctl_table random_table[] = { { .procname = "poolsize", @@ -1698,9 +1689,7 @@ static struct ctl_table random_table[] = { .data = &random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_write_thresh, - .extra2 = &max_write_thresh, + .proc_handler = proc_dointvec, }, { .procname = "urandom_min_reseed_secs", @@ -1892,13 +1881,13 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, } /* Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below random_write_wakeup_thresh, - * when the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has lapsed. + * We'll be woken up again once below POOL_MIN_BITS, when + * the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has elapsed. */ wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - input_pool.entropy_count <= random_write_wakeup_bits, + input_pool.entropy_count < POOL_MIN_BITS, CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); -- GitLab From a49c010e61e1938be851f5e49ac219d49b704103 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Feb 2022 01:45:53 +0100 Subject: [PATCH 0526/1586] random: make credit_entropy_bits() always safe This is called from various hwgenerator drivers, so rather than having one "safe" version for userspace and one "unsafe" version for the kernel, just make everything safe; the checks are cheap and sensible to have anyway. Reported-by: Sultan Alsawaf Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3b30e764eeef4..92aca0a539e0b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -447,18 +447,15 @@ static void process_random_ready_list(void) spin_unlock_irqrestore(&random_ready_list_lock, flags); } -/* - * Credit (or debit) the entropy store with n bits of entropy. - * Use credit_entropy_bits_safe() if the value comes from userspace - * or otherwise should be checked for extreme values. - */ static void credit_entropy_bits(int nbits) { int entropy_count, orig; - if (!nbits) + if (nbits <= 0) return; + nbits = min(nbits, POOL_BITS); + do { orig = READ_ONCE(input_pool.entropy_count); entropy_count = min(POOL_BITS, orig + nbits); @@ -470,18 +467,6 @@ static void credit_entropy_bits(int nbits) crng_reseed(&primary_crng, true); } -static int credit_entropy_bits_safe(int nbits) -{ - if (nbits < 0) - return -EINVAL; - - /* Cap the value to avoid overflows */ - nbits = min(nbits, POOL_BITS); - - credit_entropy_bits(nbits); - return 0; -} - /********************************************************************* * * CRNG using CHACHA20 @@ -1526,7 +1511,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - return credit_entropy_bits_safe(ent_count); + if (ent_count < 0) + return -EINVAL; + credit_entropy_bits(ent_count); + return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1539,7 +1527,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) retval = write_pool((const char __user *)p, size); if (retval < 0) return retval; - return credit_entropy_bits_safe(ent_count); + credit_entropy_bits(ent_count); + return 0; case RNDZAPENTCNT: case RNDCLEARPOOL: /* -- GitLab From 5d58ea3a31cc98b9fa563f6921d3d043bf0103d1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Feb 2022 14:17:33 -0800 Subject: [PATCH 0527/1586] random: remove use_input_pool parameter from crng_reseed() The primary_crng is always reseeded from the input_pool, while the NUMA crngs are always reseeded from the primary_crng. Remove the redundant 'use_input_pool' parameter from crng_reseed() and just directly check whether the crng is the primary_crng. Signed-off-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 92aca0a539e0b..968c415d1f45a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -365,7 +365,7 @@ static struct { static void extract_entropy(void *buf, size_t nbytes); -static void crng_reseed(struct crng_state *crng, bool use_input_pool); +static void crng_reseed(struct crng_state *crng); /* * This function adds bytes into the entropy "pool". It does not @@ -464,7 +464,7 @@ static void credit_entropy_bits(int nbits) trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(&primary_crng, true); + crng_reseed(&primary_crng); } /********************************************************************* @@ -701,7 +701,7 @@ static int crng_slow_load(const u8 *cp, size_t len) return 1; } -static void crng_reseed(struct crng_state *crng, bool use_input_pool) +static void crng_reseed(struct crng_state *crng) { unsigned long flags; int i; @@ -710,7 +710,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) u32 key[8]; } buf; - if (use_input_pool) { + if (crng == &primary_crng) { int entropy_count; do { entropy_count = READ_ONCE(input_pool.entropy_count); @@ -748,7 +748,7 @@ static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) init_time = READ_ONCE(crng->init_time); if (time_after(READ_ONCE(crng_global_init_time), init_time) || time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, crng == &primary_crng); + crng_reseed(crng); } spin_lock_irqsave(&crng->lock, flags); chacha20_block(&crng->state[0], out); @@ -1547,7 +1547,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng, true); + crng_reseed(&primary_crng); WRITE_ONCE(crng_global_init_time, jiffies - 1); return 0; default: -- GitLab From 77760fd7f7ae3dfd03668204e708d1568d75447d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 28 Jan 2022 23:29:45 +0100 Subject: [PATCH 0528/1586] random: remove batched entropy locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than use spinlocks to protect batched entropy, we can instead disable interrupts locally, since we're dealing with per-cpu data, and manage resets with a basic generation counter. At the same time, we can't quite do this on PREEMPT_RT, where we still want spinlocks-as- mutexes semantics. So we use a local_lock_t, which provides the right behavior for each. Because this is a per-cpu lock, that generation counter is still doing the necessary CPU-to-CPU communication. This should improve performance a bit. It will also fix the linked splat that Jonathan received with a PROVE_RAW_LOCK_NESTING=y. Reviewed-by: Sebastian Andrzej Siewior Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Suggested-by: Andy Lutomirski Reported-by: Jonathan Neuschäfer Tested-by: Jonathan Neuschäfer Link: https://lore.kernel.org/lkml/YfMa0QgsjCVdRAvJ@latitude/ Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 55 ++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 968c415d1f45a..ceded1c4f73b4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1731,13 +1731,16 @@ static int __init random_sysctls_init(void) device_initcall(random_sysctls_init); #endif /* CONFIG_SYSCTL */ +static atomic_t batch_generation = ATOMIC_INIT(0); + struct batched_entropy { union { u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; }; + local_lock_t lock; unsigned int position; - spinlock_t batch_lock; + int generation; }; /* @@ -1749,7 +1752,7 @@ struct batched_entropy { * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock) }; u64 get_random_u64(void) @@ -1758,67 +1761,65 @@ u64 get_random_u64(void) unsigned long flags; struct batched_entropy *batch; static void *previous; + int next_gen; warn_unseeded_randomness(&previous); + local_lock_irqsave(&batched_entropy_u64.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u64); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + + next_gen = atomic_read(&batch_generation); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 || + next_gen != batch->generation) { extract_crng((u8 *)batch->entropy_u64); batch->position = 0; + batch->generation = next_gen; } + ret = batch->entropy_u64[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); + local_unlock_irqrestore(&batched_entropy_u64.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock) }; + u32 get_random_u32(void) { u32 ret; unsigned long flags; struct batched_entropy *batch; static void *previous; + int next_gen; warn_unseeded_randomness(&previous); + local_lock_irqsave(&batched_entropy_u32.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u32); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + + next_gen = atomic_read(&batch_generation); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 || + next_gen != batch->generation) { extract_crng((u8 *)batch->entropy_u32); batch->position = 0; + batch->generation = next_gen; } + ret = batch->entropy_u32[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); + local_unlock_irqrestore(&batched_entropy_u32.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u32); /* It's important to invalidate all potential batched entropy that might * be stored before the crng is initialized, which we can do lazily by - * simply resetting the counter to zero so that it's re-extracted on the - * next usage. */ + * bumping the generation counter. + */ static void invalidate_batched_entropy(void) { - int cpu; - unsigned long flags; - - for_each_possible_cpu(cpu) { - struct batched_entropy *batched_entropy; - - batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); - spin_lock_irqsave(&batched_entropy->batch_lock, flags); - batched_entropy->position = 0; - spin_unlock(&batched_entropy->batch_lock); - - batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); - spin_lock(&batched_entropy->batch_lock); - batched_entropy->position = 0; - spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); - } + atomic_inc(&batch_generation); } /** -- GitLab From 7c2fe2b32bf76441ff5b7a425b384e5f75aa530a Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 5 Feb 2022 11:34:57 +0100 Subject: [PATCH 0529/1586] random: fix locking in crng_fast_load() crng_init is protected by primary_crng->lock, so keep holding that lock when incrementing crng_init from 0 to 1 in crng_fast_load(). The call to pr_notice() can wait until the lock is released; this code path cannot be reached twice, as crng_fast_load() aborts early if crng_init > 0. Signed-off-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ceded1c4f73b4..79cf577e5086b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -647,12 +647,13 @@ static size_t crng_fast_load(const u8 *cp, size_t len) p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; ret++; } - spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); crng_init = 1; - pr_notice("fast init done\n"); } + spin_unlock_irqrestore(&primary_crng.lock, flags); + if (crng_init == 1) + pr_notice("fast init done\n"); return ret; } -- GitLab From 28f425e573e906a4c15f8392cc2b1561ef448595 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 12:18:33 +0100 Subject: [PATCH 0530/1586] random: use RDSEED instead of RDRAND in entropy extraction When /dev/random was directly connected with entropy extraction, without any expansion stage, extract_buf() was called for every 10 bytes of data read from /dev/random. For that reason, RDRAND was used rather than RDSEED. At the same time, crng_reseed() was still only called every 5 minutes, so there RDSEED made sense. Those olden days were also a time when the entropy collector did not use a cryptographic hash function, which meant most bets were off in terms of real preimage resistance. For that reason too it didn't matter _that_ much whether RDSEED was mixed in before or after entropy extraction; both choices were sort of bad. But now we have a cryptographic hash function at work, and with that we get real preimage resistance. We also now only call extract_entropy() every 5 minutes, rather than every 10 bytes. This allows us to do two important things. First, we can switch to using RDSEED in extract_entropy(), as Dominik suggested. Second, we can ensure that RDSEED input always goes into the cryptographic hash function with other things before being used directly. This eliminates a category of attacks in which the CPU knows the current state of the crng and knows that we're going to xor RDSEED into it, and so it computes a malicious RDSEED. By going through our hash function, it would require the CPU to compute a preimage on the fly, which isn't going to happen. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Suggested-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 79cf577e5086b..41f42d8875138 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -727,13 +727,8 @@ static void crng_reseed(struct crng_state *crng) CHACHA_KEY_SIZE); } spin_lock_irqsave(&crng->lock, flags); - for (i = 0; i < 8; i++) { - unsigned long rv; - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - crng->state[i + 4] ^= buf.key[i] ^ rv; - } + for (i = 0; i < 8; i++) + crng->state[i + 4] ^= buf.key[i]; memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); @@ -1054,16 +1049,17 @@ static void extract_entropy(void *buf, size_t nbytes) unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; struct { - unsigned long rdrand[32 / sizeof(long)]; + unsigned long rdseed[32 / sizeof(long)]; size_t counter; } block; size_t i; trace_extract_entropy(nbytes, input_pool.entropy_count); - for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) { - if (!arch_get_random_long(&block.rdrand[i])) - block.rdrand[i] = random_get_entropy(); + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); } spin_lock_irqsave(&input_pool.lock, flags); @@ -1071,7 +1067,7 @@ static void extract_entropy(void *buf, size_t nbytes) /* seed = HASHPRF(last_key, entropy_input) */ blake2s_final(&input_pool.hash, seed); - /* next_key = HASHPRF(seed, RDRAND || 0) */ + /* next_key = HASHPRF(seed, RDSEED || 0) */ block.counter = 0; blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); @@ -1081,7 +1077,7 @@ static void extract_entropy(void *buf, size_t nbytes) while (nbytes) { i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); - /* output = HASHPRF(seed, RDRAND || ++counter) */ + /* output = HASHPRF(seed, RDSEED || ++counter) */ ++block.counter; blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); nbytes -= i; -- GitLab From a9412d510ab9a9ba411fea612903631d2e1f1601 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 6 Feb 2022 23:51:41 +0100 Subject: [PATCH 0531/1586] random: get rid of secondary crngs As the comment said, this is indeed a "hack". Since it was introduced, it's been a constant state machine nightmare, with lots of subtle early boot issues and a wildly complex set of machinery to keep everything in sync. Rather than continuing to play whack-a-mole with this approach, this commit simply removes it entirely. This commit is preparation for "random: use simpler fast key erasure flow on per-cpu keys" in this series, which introduces a simpler (and faster) mechanism to accomplish the same thing. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 225 ++++++++++-------------------------------- 1 file changed, 53 insertions(+), 172 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 41f42d8875138..be4922e09cc57 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -323,14 +323,11 @@ static struct crng_state primary_crng = { * its value (from 0->1->2). */ static int crng_init = 0; -static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; -static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); -static void _crng_backtrack_protect(struct crng_state *crng, - u8 tmp[CHACHA_BLOCK_SIZE], int used); +static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]); +static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -365,7 +362,7 @@ static struct { static void extract_entropy(void *buf, size_t nbytes); -static void crng_reseed(struct crng_state *crng); +static void crng_reseed(void); /* * This function adds bytes into the entropy "pool". It does not @@ -464,7 +461,7 @@ static void credit_entropy_bits(int nbits) trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(&primary_crng); + crng_reseed(); } /********************************************************************* @@ -477,16 +474,7 @@ static void credit_entropy_bits(int nbits) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); -/* - * Hack to deal with crazy userspace progams when they are all trying - * to access /dev/urandom in parallel. The programs are almost - * certainly doing something terribly wrong, but we'll work around - * their brain damage. - */ -static struct crng_state **crng_node_pool __read_mostly; - static void invalidate_batched_entropy(void); -static void numa_crng_init(void); static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); static int __init parse_trust_cpu(char *arg) @@ -495,24 +483,6 @@ static int __init parse_trust_cpu(char *arg) } early_param("random.trust_cpu", parse_trust_cpu); -static bool crng_init_try_arch(struct crng_state *crng) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - crng->state[i] ^= rv; - } - - return arch_init; -} - static bool __init crng_init_try_arch_early(void) { int i; @@ -531,100 +501,17 @@ static bool __init crng_init_try_arch_early(void) return arch_init; } -static void crng_initialize_secondary(struct crng_state *crng) -{ - chacha_init_consts(crng->state); - _get_random_bytes(&crng->state[4], sizeof(u32) * 12); - crng_init_try_arch(crng); - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - -static void __init crng_initialize_primary(void) +static void __init crng_initialize(void) { extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); - numa_crng_init(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -static void crng_finalize_init(void) -{ - if (!system_wq) { - /* We can't call numa_crng_init until we have workqueues, - * so mark this for processing later. */ - crng_need_final_init = true; - return; - } - - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - crng_need_final_init = false; - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } -} - -static void do_numa_crng_init(struct work_struct *work) -{ - int i; - struct crng_state *crng; - struct crng_state **pool; - - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize_secondary(crng); - pool[i] = crng; - } - /* pairs with READ_ONCE() in select_crng() */ - if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) { - for_each_node(i) - kfree(pool[i]); - kfree(pool); - } -} - -static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); - -static void numa_crng_init(void) -{ - if (IS_ENABLED(CONFIG_NUMA)) - schedule_work(&numa_crng_init_work); -} - -static struct crng_state *select_crng(void) -{ - if (IS_ENABLED(CONFIG_NUMA)) { - struct crng_state **pool; - int nid = numa_node_id(); - - /* pairs with cmpxchg_release() in do_numa_crng_init() */ - pool = READ_ONCE(crng_node_pool); - if (pool && pool[nid]) - return pool[nid]; - } - - return &primary_crng; -} - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -702,68 +589,71 @@ static int crng_slow_load(const u8 *cp, size_t len) return 1; } -static void crng_reseed(struct crng_state *crng) +static void crng_reseed(void) { unsigned long flags; - int i; + int i, entropy_count; union { u8 block[CHACHA_BLOCK_SIZE]; u32 key[8]; } buf; - if (crng == &primary_crng) { - int entropy_count; - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf.key, sizeof(buf.key)); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } else { - _extract_crng(&primary_crng, buf.block); - _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA_KEY_SIZE); - } - spin_lock_irqsave(&crng->lock, flags); + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_BITS) + return; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf.key, sizeof(buf.key)); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + + spin_lock_irqsave(&primary_crng.lock, flags); for (i = 0; i < 8; i++) - crng->state[i + 4] ^= buf.key[i]; + primary_crng.state[i + 4] ^= buf.key[i]; memzero_explicit(&buf, sizeof(buf)); - WRITE_ONCE(crng->init_time, jiffies); - spin_unlock_irqrestore(&crng->lock, flags); - if (crng == &primary_crng && crng_init < 2) - crng_finalize_init(); + WRITE_ONCE(primary_crng.init_time, jiffies); + spin_unlock_irqrestore(&primary_crng.lock, flags); + if (crng_init < 2) { + invalidate_batched_entropy(); + crng_init = 2; + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + if (urandom_warning.missed) { + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + urandom_warning.missed = 0; + } + } } -static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) +static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) { unsigned long flags, init_time; if (crng_ready()) { - init_time = READ_ONCE(crng->init_time); - if (time_after(READ_ONCE(crng_global_init_time), init_time) || - time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng); + init_time = READ_ONCE(primary_crng.init_time); + if (time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) + crng_reseed(); } - spin_lock_irqsave(&crng->lock, flags); - chacha20_block(&crng->state[0], out); - if (crng->state[12] == 0) - crng->state[13]++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) -{ - _extract_crng(select_crng(), out); + spin_lock_irqsave(&primary_crng.lock, flags); + chacha20_block(&primary_crng.state[0], out); + if (primary_crng.state[12] == 0) + primary_crng.state[13]++; + spin_unlock_irqrestore(&primary_crng.lock, flags); } /* * Use the leftover bytes from the CRNG block output (if there is * enough) to mutate the CRNG key to provide backtracking protection. */ -static void _crng_backtrack_protect(struct crng_state *crng, - u8 tmp[CHACHA_BLOCK_SIZE], int used) +static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; u32 *s, *d; @@ -774,17 +664,12 @@ static void _crng_backtrack_protect(struct crng_state *crng, extract_crng(tmp); used = 0; } - spin_lock_irqsave(&crng->lock, flags); + spin_lock_irqsave(&primary_crng.lock, flags); s = (u32 *)&tmp[used]; - d = &crng->state[4]; + d = &primary_crng.state[4]; for (i = 0; i < 8; i++) *d++ ^= *s++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - _crng_backtrack_protect(select_crng(), tmp, used); + spin_unlock_irqrestore(&primary_crng.lock, flags); } static ssize_t extract_crng_user(void __user *buf, size_t nbytes) @@ -1371,10 +1256,7 @@ static void __init init_std_data(void) int __init rand_initialize(void) { init_std_data(); - if (crng_need_final_init) - crng_finalize_init(); - crng_initialize_primary(); - crng_global_init_time = jiffies; + crng_initialize(); if (ratelimit_disable) { urandom_warning.interval = 0; unseeded_warning.interval = 0; @@ -1544,8 +1426,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng); - WRITE_ONCE(crng_global_init_time, jiffies - 1); + crng_reseed(); return 0; default: return -EINVAL; -- GitLab From 8566417221fcec51346ec164e920dacb979c6b5f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 12:40:14 +0100 Subject: [PATCH 0532/1586] random: inline leaves of rand_initialize() This is a preparatory commit for the following one. We simply inline the various functions that rand_initialize() calls that have no other callers. The compiler was doing this anyway before. Doing this will allow us to reorganize this after. We can then move the trust_cpu and parse_trust_cpu definitions a bit closer to where they're actually used, which makes the code easier to read. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 90 ++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 57 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index be4922e09cc57..a55a58b9807ae 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -476,42 +476,6 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static void invalidate_batched_entropy(void); -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static int __init parse_trust_cpu(char *arg) -{ - return kstrtobool(arg, &trust_cpu); -} -early_param("random.trust_cpu", parse_trust_cpu); - -static bool __init crng_init_try_arch_early(void) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - primary_crng.state[i] ^= rv; - } - - return arch_init; -} - -static void __init crng_initialize(void) -{ - extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); - if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { - invalidate_batched_entropy(); - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } - primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -1220,17 +1184,28 @@ int __must_check get_random_bytes_arch(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes_arch); +static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static int __init parse_trust_cpu(char *arg) +{ + return kstrtobool(arg, &trust_cpu); +} +early_param("random.trust_cpu", parse_trust_cpu); + /* - * init_std_data - initialize pool with system data - * - * This function clears the pool's entropy count and mixes some system - * data into the pool to prepare it for use. The pool is not cleared - * as that can only decrease the entropy in the pool. + * Note that setup_arch() may call add_device_randomness() + * long before we get here. This allows seeding of the pools + * with some platform dependent data very early in the boot + * process. But it limits our options here. We must use + * statically allocated structures that already have all + * initializations complete at compile time. We should also + * take care not to overwrite the precious per platform data + * we were given. */ -static void __init init_std_data(void) +int __init rand_initialize(void) { int i; ktime_t now = ktime_get_real(); + bool arch_init = true; unsigned long rv; mix_pool_bytes(&now, sizeof(now)); @@ -1241,22 +1216,23 @@ static void __init init_std_data(void) mix_pool_bytes(&rv, sizeof(rv)); } mix_pool_bytes(utsname(), sizeof(*(utsname()))); -} -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) -{ - init_std_data(); - crng_initialize(); + extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + for (i = 4; i < 16; i++) { + if (!arch_get_random_seed_long_early(&rv) && + !arch_get_random_long_early(&rv)) { + rv = random_get_entropy(); + arch_init = false; + } + primary_crng.state[i] ^= rv; + } + if (arch_init && trust_cpu && crng_init < 2) { + invalidate_batched_entropy(); + crng_init = 2; + pr_notice("crng init done (trusting CPU's manufacturer)\n"); + } + primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; + if (ratelimit_disable) { urandom_warning.interval = 0; unseeded_warning.interval = 0; -- GitLab From a02cf3d0dd77244fd5333ac48d78871de459ae6d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 12:44:28 +0100 Subject: [PATCH 0533/1586] random: ensure early RDSEED goes through mixer on init Continuing the reasoning of "random: use RDSEED instead of RDRAND in entropy extraction" from this series, at init time we also don't want to be xoring RDSEED directly into the crng. Instead it's safer to put it into our entropy collector and then re-extract it, so that it goes through a hash function with preimage resistance. As a matter of hygiene, we also order these now so that the RDSEED byte are hashed in first, followed by the bytes that are likely more predictable (e.g. utsname()). Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index a55a58b9807ae..21a067cf5b4c4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1208,24 +1208,18 @@ int __init rand_initialize(void) bool arch_init = true; unsigned long rv; - mix_pool_bytes(&now, sizeof(now)); for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - mix_pool_bytes(&rv, sizeof(rv)); - } - mix_pool_bytes(utsname(), sizeof(*(utsname()))); - - extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); - for (i = 4; i < 16; i++) { if (!arch_get_random_seed_long_early(&rv) && !arch_get_random_long_early(&rv)) { rv = random_get_entropy(); arch_init = false; } - primary_crng.state[i] ^= rv; + mix_pool_bytes(&rv, sizeof(rv)); } + mix_pool_bytes(&now, sizeof(now)); + mix_pool_bytes(utsname(), sizeof(*(utsname()))); + + extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); if (arch_init && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; -- GitLab From 91c2afca290ed3034841c8c8532e69ed9e16cf34 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 13:00:11 +0100 Subject: [PATCH 0534/1586] random: do not xor RDRAND when writing into /dev/random Continuing the reasoning of "random: ensure early RDSEED goes through mixer on init", we don't want RDRAND interacting with anything without going through the mixer function, as a backdoored CPU could presumably cancel out data during an xor, which it'd have a harder time doing when being forced through a cryptographic hash function. There's actually no need at all to be calling RDRAND in write_pool(), because before we extract from the pool, we always do so with 32 bytes of RDSEED hashed in at that stage. Xoring at this stage is needless and introduces a minor liability. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 21a067cf5b4c4..d31b0b3afe2ed 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1305,25 +1305,15 @@ static __poll_t random_poll(struct file *file, poll_table *wait) static int write_pool(const char __user *buffer, size_t count) { size_t bytes; - u32 t, buf[16]; + u8 buf[BLAKE2S_BLOCK_SIZE]; const char __user *p = buffer; while (count > 0) { - int b, i = 0; - bytes = min(count, sizeof(buf)); - if (copy_from_user(&buf, p, bytes)) + if (copy_from_user(buf, p, bytes)) return -EFAULT; - - for (b = bytes; b > 0; b -= sizeof(u32), i++) { - if (!arch_get_random_int(&t)) - break; - buf[i] ^= t; - } - count -= bytes; p += bytes; - mix_pool_bytes(buf, bytes); cond_resched(); } -- GitLab From 80e4e1f472889f31a4dcaea3a4eb7a565296f1f3 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sun, 20 Feb 2022 19:19:10 +1300 Subject: [PATCH 0535/1586] irqchip/gic-v3: Use dsb(ishst) to order writes with ICC_SGI1R_EL1 accesses A dsb(ishst) barrier should be enough to order previous writes with the system register generating the SGI, as we only need to guarantee the visibility of data to other CPUs in the inner shareable domain before we send the SGI. A micro-benchmark is written to verify the performance impact on kunpeng920 machine with 2 sockets, each socket has 2 dies, and each die has 24 CPUs, so totally the system has 2 * 2 * 24 = 96 CPUs. ~2% performance improvement can be seen by this benchmark. The code of benchmark module: #include #include volatile int data0 ____cacheline_aligned; volatile int data1 ____cacheline_aligned; volatile int data2 ____cacheline_aligned; volatile int data3 ____cacheline_aligned; volatile int data4 ____cacheline_aligned; volatile int data5 ____cacheline_aligned; volatile int data6 ____cacheline_aligned; static void ipi_latency_func(void *val) { } static int __init ipi_latency_init(void) { ktime_t stime, etime, delta; int cpu, i; int start = smp_processor_id(); stime = ktime_get(); for ( i = 0; i < 1000; i++) for (cpu = 0; cpu < 96; cpu++) { data0 = data1 = data2 = data3 = data4 = data5 = data6 = cpu; smp_call_function_single(cpu, ipi_latency_func, NULL, 1); } etime = ktime_get(); delta = ktime_sub(etime, stime); printk("%s ipi from cpu%d to cpu0-95 delta of 1000times:%lld\n", __func__, start, delta); return 0; } module_init(ipi_latency_init); static void ipi_latency_exit(void) { } module_exit(ipi_latency_exit); MODULE_DESCRIPTION("IPI benchmark"); MODULE_LICENSE("GPL"); run the below commands 10 times on both Vanilla and the kernel with this patch: # taskset -c 0 insmod test.ko # rmmod test The result on vanilla: ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126757449 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126784249 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126177703 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127022281 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126184883 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127374585 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:125778089 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126974441 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127357625 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126228184 The result on the kernel with this patch: ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124467401 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123474209 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123558497 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122993951 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122984223 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123323609 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124507583 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123386963 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123340664 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123285324 Signed-off-by: Barry Song [maz: tidied up commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220220061910.6155-1-21cnbao@gmail.com --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5e935d97207dc..0efe1a9a9f3b2 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1211,7 +1211,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) * Ensure that stores to Normal memory are visible to the * other CPUs before issuing the IPI. */ - wmb(); + dsb(ishst); for_each_cpu(cpu, mask) { u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); -- GitLab From a8ab8ef437d48173e166f1f44f202097e91bf81c Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 6 Feb 2022 06:58:03 -0800 Subject: [PATCH 0536/1586] ACPI: clean up double words in two comments Remove the second 'on' and 'those'. Signed-off-by: Tom Rix [ rjw: Subject adjustments ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 2 +- drivers/acpi/pci_link.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 273741dedfd20..1e34f846508fb 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -302,7 +302,7 @@ config ACPI_IPMI help This driver enables the ACPI to access the BMC controller. And it uses the IPMI request/response message to communicate with BMC - controller, which can be found on on the server. + controller, which can be found on the server. To compile this driver as a module, choose M here: the module will be called as acpi_ipmi. diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index d54fb8e54671d..58647051c948a 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -185,7 +185,7 @@ static acpi_status acpi_pci_link_check_current(struct acpi_resource *resource, if (!p || !p->interrupt_count) { /* * IRQ descriptors may have no IRQ# bits set, - * particularly those those w/ _STA disabled + * particularly those w/ _STA disabled */ pr_debug("Blank _CRS IRQ resource\n"); return AE_OK; -- GitLab From c30c575db4858f0bbe5e315ff2e529c782f33a1f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 01:56:35 +0100 Subject: [PATCH 0537/1586] random: absorb fast pool into input pool after fast load During crng_init == 0, we never credit entropy in add_interrupt_ randomness(), but instead dump it directly into the primary_crng. That's fine, except for the fact that we then wind up throwing away that entropy later when we switch to extracting from the input pool and xoring into (and later in this series overwriting) the primary_crng key. The two other early init sites -- add_hwgenerator_randomness()'s use crng_fast_load() and add_device_ randomness()'s use of crng_slow_load() -- always additionally give their inputs to the input pool. But not add_interrupt_randomness(). This commit fixes that shortcoming by calling mix_pool_bytes() after crng_fast_load() in add_interrupt_randomness(). That's partially verboten on PREEMPT_RT, where it implies taking spinlock_t from an IRQ handler. But this also only happens during early boot and then never again after that. Plus it's a trylock so it has the same considerations as calling crng_fast_load(), which we're already using. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Suggested-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index d31b0b3afe2ed..f3179c67010b9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -850,6 +850,10 @@ void add_interrupt_randomness(int irq) crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; + if (spin_trylock(&input_pool.lock)) { + _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + spin_unlock(&input_pool.lock); + } } return; } -- GitLab From 186873c549df11b63e17062f863654e1501e1524 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 7 Feb 2022 15:08:49 +0100 Subject: [PATCH 0538/1586] random: use simpler fast key erasure flow on per-cpu keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than the clunky NUMA full ChaCha state system we had prior, this commit is closer to the original "fast key erasure RNG" proposal from , by simply treating ChaCha keys on a per-cpu basis. All entropy is extracted to a base crng key of 32 bytes. This base crng has a birthdate and a generation counter. When we go to take bytes from the crng, we first check if the birthdate is too old; if it is, we reseed per usual. Then we start working on a per-cpu crng. This per-cpu crng makes sure that it has the same generation counter as the base crng. If it doesn't, it does fast key erasure with the base crng key and uses the output as its new per-cpu key, and then updates its local generation counter. Then, using this per-cpu state, we do ordinary fast key erasure. Half of this first block is used to overwrite the per-cpu crng key for the next call -- this is the fast key erasure RNG idea -- and the other half, along with the ChaCha state, is returned to the caller. If the caller desires more than this remaining half, it can generate more ChaCha blocks, unlocked, using the now detached ChaCha state that was just returned. Crypto-wise, this is more or less what we were doing before, but this simply makes it more explicit and ensures that we always have backtrack protection by not playing games with a shared block counter. The flow looks like this: ──extract()──► base_crng.key ◄──memcpy()───┐ │ │ └──chacha()──────┬─► new_base_key └─► crngs[n].key ◄──memcpy()───┐ │ │ └──chacha()───┬─► new_key └─► random_bytes │ └────► There are a few hairy details around early init. Just as was done before, prior to having gathered enough entropy, crng_fast_load() and crng_slow_load() dump bytes directly into the base crng, and when we go to take bytes from the crng, in that case, we're doing fast key erasure with the base crng rather than the fast unlocked per-cpu crngs. This is fine as that's only the state of affairs during very early boot; once the crng initializes we never use these paths again. In the process of all this, the APIs into the crng become a bit simpler: we have get_random_bytes(buf, len) and get_random_bytes_user(buf, len), which both do what you'd expect. All of the details of fast key erasure and per-cpu selection happen only in a very short critical section of crng_make_state(), which selects the right per-cpu key, does the fast key erasure, and returns a local state to the caller's stack. So, we no longer have a need for a separate backtrack function, as this happens all at once here. The API then allows us to extend backtrack protection to batched entropy without really having to do much at all. The result is a bit simpler than before and has fewer foot guns. The init time state machine also gets a lot simpler as we don't need to wait for workqueues to come online and do deferred work. And the multi-core performance should be increased significantly, by virtue of having hardly any locking on the fast path. Cc: Theodore Ts'o Cc: Dominik Brodowski Cc: Sebastian Andrzej Siewior Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 395 ++++++++++++++++++++++++------------------ 1 file changed, 229 insertions(+), 166 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index f3179c67010b9..538eaa469f5e4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -67,63 +67,19 @@ * Exported interfaces ---- kernel output * -------------------------------------- * - * The primary kernel interface is + * The primary kernel interfaces are: * * void get_random_bytes(void *buf, int nbytes); - * - * This interface will return the requested number of random bytes, - * and place it in the requested buffer. This is equivalent to a - * read from /dev/urandom. - * - * For less critical applications, there are the functions: - * * u32 get_random_u32() * u64 get_random_u64() * unsigned int get_random_int() * unsigned long get_random_long() * - * These are produced by a cryptographic RNG seeded from get_random_bytes, - * and so do not deplete the entropy pool as much. These are recommended - * for most in-kernel operations *if the result is going to be stored in - * the kernel*. - * - * Specifically, the get_random_int() family do not attempt to do - * "anti-backtracking". If you capture the state of the kernel (e.g. - * by snapshotting the VM), you can figure out previous get_random_int() - * return values. But if the value is stored in the kernel anyway, - * this is not a problem. - * - * It *is* safe to expose get_random_int() output to attackers (e.g. as - * network cookies); given outputs 1..n, it's not feasible to predict - * outputs 0 or n+1. The only concern is an attacker who breaks into - * the kernel later; the get_random_int() engine is not reseeded as - * often as the get_random_bytes() one. - * - * get_random_bytes() is needed for keys that need to stay secret after - * they are erased from the kernel. For example, any key that will - * be wrapped and stored encrypted. And session encryption keys: we'd - * like to know that after the session is closed and the keys erased, - * the plaintext is unrecoverable to someone who recorded the ciphertext. - * - * But for network ports/cookies, stack canaries, PRNG seeds, address - * space layout randomization, session *authentication* keys, or other - * applications where the sensitive data is stored in the kernel in - * plaintext for as long as it's sensitive, the get_random_int() family - * is just fine. - * - * Consider ASLR. We want to keep the address space secret from an - * outside attacker while the process is running, but once the address - * space is torn down, it's of no use to an attacker any more. And it's - * stored in kernel data structures as long as it's alive, so worrying - * about an attacker's ability to extrapolate it from the get_random_int() - * CRNG is silly. - * - * Even some cryptographic keys are safe to generate with get_random_int(). - * In particular, keys for SipHash are generally fine. Here, knowledge - * of the key authorizes you to do something to a kernel object (inject - * packets to a network connection, or flood a hash table), and the - * key is stored with the object being protected. Once it goes away, - * we no longer care if anyone knows the key. + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. This is equivalent to a + * read from /dev/urandom. The get_random_{u32,u64,int,long}() family + * of functions may be higher performance for one-off random integers, + * because they do a bit of buffering. * * prandom_u32() * ------------- @@ -300,20 +256,6 @@ static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); -struct crng_state { - u32 state[16]; - unsigned long init_time; - spinlock_t lock; -}; - -static struct crng_state primary_crng = { - .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), - .state[0] = CHACHA_CONSTANT_EXPA, - .state[1] = CHACHA_CONSTANT_ND_3, - .state[2] = CHACHA_CONSTANT_2_BY, - .state[3] = CHACHA_CONSTANT_TE_K, -}; - /* * crng_init = 0 --> Uninitialized * 1 --> Initialized @@ -325,9 +267,6 @@ static struct crng_state primary_crng = { static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; -#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]); -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -470,7 +409,30 @@ static void credit_entropy_bits(int nbits) * *********************************************************************/ -#define CRNG_RESEED_INTERVAL (300 * HZ) +enum { + CRNG_RESEED_INTERVAL = 300 * HZ, + CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE +}; + +static struct { + u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); + unsigned long birth; + unsigned long generation; + spinlock_t lock; +} base_crng = { + .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) +}; + +struct crng { + u8 key[CHACHA_KEY_SIZE]; + unsigned long generation; + local_lock_t lock; +}; + +static DEFINE_PER_CPU(struct crng, crngs) = { + .generation = ULONG_MAX, + .lock = INIT_LOCAL_LOCK(crngs.lock), +}; static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); @@ -487,22 +449,22 @@ static size_t crng_fast_load(const u8 *cp, size_t len) u8 *p; size_t ret = 0; - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + if (!spin_trylock_irqsave(&base_crng.lock, flags)) return 0; if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } - p = (u8 *)&primary_crng.state[4]; + p = base_crng.key; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; + p[crng_init_cnt % sizeof(base_crng.key)] ^= *cp; cp++; crng_init_cnt++; len--; ret++; } if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); crng_init = 1; } - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); if (crng_init == 1) pr_notice("fast init done\n"); return ret; @@ -527,14 +489,14 @@ static int crng_slow_load(const u8 *cp, size_t len) unsigned long flags; static u8 lfsr = 1; u8 tmp; - unsigned int i, max = CHACHA_KEY_SIZE; + unsigned int i, max = sizeof(base_crng.key); const u8 *src_buf = cp; - u8 *dest_buf = (u8 *)&primary_crng.state[4]; + u8 *dest_buf = base_crng.key; - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + if (!spin_trylock_irqsave(&base_crng.lock, flags)) return 0; if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } if (len > max) @@ -545,38 +507,50 @@ static int crng_slow_load(const u8 *cp, size_t len) lfsr >>= 1; if (tmp & 1) lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA_KEY_SIZE]; - dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + tmp = dest_buf[i % sizeof(base_crng.key)]; + dest_buf[i % sizeof(base_crng.key)] ^= src_buf[i % len] ^ lfsr; lfsr += (tmp << 3) | (tmp >> 5); } - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&base_crng.lock, flags); return 1; } static void crng_reseed(void) { unsigned long flags; - int i, entropy_count; - union { - u8 block[CHACHA_BLOCK_SIZE]; - u32 key[8]; - } buf; + int entropy_count; + unsigned long next_gen; + u8 key[CHACHA_KEY_SIZE]; + /* + * First we make sure we have POOL_MIN_BITS of entropy in the pool, + * and then we drain all of it. Only then can we extract a new key. + */ do { entropy_count = READ_ONCE(input_pool.entropy_count); if (entropy_count < POOL_MIN_BITS) return; } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf.key, sizeof(buf.key)); + extract_entropy(key, sizeof(key)); wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); - spin_lock_irqsave(&primary_crng.lock, flags); - for (i = 0; i < 8; i++) - primary_crng.state[i + 4] ^= buf.key[i]; - memzero_explicit(&buf, sizeof(buf)); - WRITE_ONCE(primary_crng.init_time, jiffies); - spin_unlock_irqrestore(&primary_crng.lock, flags); + /* + * We copy the new key into the base_crng, overwriting the old one, + * and update the generation counter. We avoid hitting ULONG_MAX, + * because the per-cpu crngs are initialized to ULONG_MAX, so this + * forces new CPUs that come online to always initialize. + */ + spin_lock_irqsave(&base_crng.lock, flags); + memcpy(base_crng.key, key, sizeof(base_crng.key)); + next_gen = base_crng.generation + 1; + if (next_gen == ULONG_MAX) + ++next_gen; + WRITE_ONCE(base_crng.generation, next_gen); + WRITE_ONCE(base_crng.birth, jiffies); + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); + if (crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; @@ -597,77 +571,143 @@ static void crng_reseed(void) } } -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) +/* + * The general form here is based on a "fast key erasure RNG" from + * . It generates a ChaCha + * block using the provided key, and then immediately overwites that + * key with half the block. It returns the resultant ChaCha state to the + * user, along with the second half of the block containing 32 bytes of + * random data that may be used; random_data_len may not be greater than + * 32. + */ +static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], + u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) { - unsigned long flags, init_time; + u8 first_block[CHACHA_BLOCK_SIZE]; - if (crng_ready()) { - init_time = READ_ONCE(primary_crng.init_time); - if (time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(); - } - spin_lock_irqsave(&primary_crng.lock, flags); - chacha20_block(&primary_crng.state[0], out); - if (primary_crng.state[12] == 0) - primary_crng.state[13]++; - spin_unlock_irqrestore(&primary_crng.lock, flags); + BUG_ON(random_data_len > 32); + + chacha_init_consts(chacha_state); + memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); + memset(&chacha_state[12], 0, sizeof(u32) * 4); + chacha20_block(chacha_state, first_block); + + memcpy(key, first_block, CHACHA_KEY_SIZE); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memzero_explicit(first_block, sizeof(first_block)); } /* - * Use the leftover bytes from the CRNG block output (if there is - * enough) to mutate the CRNG key to provide backtracking protection. + * This function returns a ChaCha state that you may use for generating + * random data. It also returns up to 32 bytes on its own of random data + * that may be used; random_data_len may not be greater than 32. */ -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) +static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) { unsigned long flags; - u32 *s, *d; - int i; + struct crng *crng; - used = round_up(used, sizeof(u32)); - if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { - extract_crng(tmp); - used = 0; + BUG_ON(random_data_len > 32); + + /* + * For the fast path, we check whether we're ready, unlocked first, and + * then re-check once locked later. In the case where we're really not + * ready, we do fast key erasure with the base_crng directly, because + * this is what crng_{fast,slow}_load mutate during early init. + */ + if (unlikely(!crng_ready())) { + bool ready; + + spin_lock_irqsave(&base_crng.lock, flags); + ready = crng_ready(); + if (!ready) + crng_fast_key_erasure(base_crng.key, chacha_state, + random_data, random_data_len); + spin_unlock_irqrestore(&base_crng.lock, flags); + if (!ready) + return; } - spin_lock_irqsave(&primary_crng.lock, flags); - s = (u32 *)&tmp[used]; - d = &primary_crng.state[4]; - for (i = 0; i < 8; i++) - *d++ ^= *s++; - spin_unlock_irqrestore(&primary_crng.lock, flags); + + /* + * If the base_crng is more than 5 minutes old, we reseed, which + * in turn bumps the generation counter that we check below. + */ + if (unlikely(time_after(jiffies, READ_ONCE(base_crng.birth) + CRNG_RESEED_INTERVAL))) + crng_reseed(); + + local_lock_irqsave(&crngs.lock, flags); + crng = raw_cpu_ptr(&crngs); + + /* + * If our per-cpu crng is older than the base_crng, then it means + * somebody reseeded the base_crng. In that case, we do fast key + * erasure on the base_crng, and use its output as the new key + * for our per-cpu crng. This brings us up to date with base_crng. + */ + if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { + spin_lock(&base_crng.lock); + crng_fast_key_erasure(base_crng.key, chacha_state, + crng->key, sizeof(crng->key)); + crng->generation = base_crng.generation; + spin_unlock(&base_crng.lock); + } + + /* + * Finally, when we've made it this far, our per-cpu crng has an up + * to date key, and we can do fast key erasure with it to produce + * some random data and a ChaCha state for the caller. All other + * branches of this function are "unlikely", so most of the time we + * should wind up here immediately. + */ + crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); + local_unlock_irqrestore(&crngs.lock, flags); } -static ssize_t extract_crng_user(void __user *buf, size_t nbytes) +static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - int large_request = (nbytes > 256); + bool large_request = nbytes > 256; + ssize_t ret = 0, len; + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 output[CHACHA_BLOCK_SIZE]; + + if (!nbytes) + return 0; + + len = min_t(ssize_t, 32, nbytes); + crng_make_state(chacha_state, output, len); + + if (copy_to_user(buf, output, len)) + return -EFAULT; + nbytes -= len; + buf += len; + ret += len; while (nbytes) { if (large_request && need_resched()) { - if (signal_pending(current)) { - if (ret == 0) - ret = -ERESTARTSYS; + if (signal_pending(current)) break; - } schedule(); } - extract_crng(tmp); - i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, tmp, i)) { + chacha20_block(chacha_state, output); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + + len = min_t(ssize_t, nbytes, CHACHA_BLOCK_SIZE); + if (copy_to_user(buf, output, len)) { ret = -EFAULT; break; } - nbytes -= i; - buf += i; - ret += i; + nbytes -= len; + buf += len; + ret += len; } - crng_backtrack_protect(tmp, i); - - /* Wipe data just written to memory */ - memzero_explicit(tmp, sizeof(tmp)); + memzero_explicit(chacha_state, sizeof(chacha_state)); + memzero_explicit(output, sizeof(output)); return ret; } @@ -976,23 +1016,36 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void */ static void _get_random_bytes(void *buf, int nbytes) { - u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 tmp[CHACHA_BLOCK_SIZE]; + ssize_t len; trace_get_random_bytes(nbytes, _RET_IP_); - while (nbytes >= CHACHA_BLOCK_SIZE) { - extract_crng(buf); - buf += CHACHA_BLOCK_SIZE; + if (!nbytes) + return; + + len = min_t(ssize_t, 32, nbytes); + crng_make_state(chacha_state, buf, len); + nbytes -= len; + buf += len; + + while (nbytes) { + if (nbytes < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, nbytes); + memzero_explicit(tmp, sizeof(tmp)); + break; + } + + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; nbytes -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; } - if (nbytes > 0) { - extract_crng(tmp); - memcpy(buf, tmp, nbytes); - crng_backtrack_protect(tmp, nbytes); - } else - crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); - memzero_explicit(tmp, sizeof(tmp)); + memzero_explicit(chacha_state, sizeof(chacha_state)); } void get_random_bytes(void *buf, int nbytes) @@ -1223,13 +1276,12 @@ int __init rand_initialize(void) mix_pool_bytes(&now, sizeof(now)); mix_pool_bytes(utsname(), sizeof(*(utsname()))); - extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + extract_entropy(base_crng.key, sizeof(base_crng.key)); if (arch_init && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } - primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; if (ratelimit_disable) { urandom_warning.interval = 0; @@ -1261,7 +1313,7 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, int ret; nbytes = min_t(size_t, nbytes, INT_MAX >> 6); - ret = extract_crng_user(buf, nbytes); + ret = get_random_bytes_user(buf, nbytes); trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count); return ret; } @@ -1577,8 +1629,15 @@ static atomic_t batch_generation = ATOMIC_INIT(0); struct batched_entropy { union { - u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; + /* + * We make this 1.5x a ChaCha block, so that we get the + * remaining 32 bytes from fast key erasure, plus one full + * block from the detached ChaCha state. We can increase + * the size of this later if needed so long as we keep the + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. + */ + u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; + u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; }; local_lock_t lock; unsigned int position; @@ -1587,14 +1646,13 @@ struct batched_entropy { /* * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom, but there is no backtrack protection, with - * the goal of being quite fast and not depleting entropy. In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once at any - * point prior. + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock) + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), + .position = UINT_MAX }; u64 get_random_u64(void) @@ -1611,21 +1669,24 @@ u64 get_random_u64(void) batch = raw_cpu_ptr(&batched_entropy_u64); next_gen = atomic_read(&batch_generation); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 || + if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || next_gen != batch->generation) { - extract_crng((u8 *)batch->entropy_u64); + _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); batch->position = 0; batch->generation = next_gen; } - ret = batch->entropy_u64[batch->position++]; + ret = batch->entropy_u64[batch->position]; + batch->entropy_u64[batch->position] = 0; + ++batch->position; local_unlock_irqrestore(&batched_entropy_u64.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock) + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), + .position = UINT_MAX }; u32 get_random_u32(void) @@ -1642,14 +1703,16 @@ u32 get_random_u32(void) batch = raw_cpu_ptr(&batched_entropy_u32); next_gen = atomic_read(&batch_generation); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 || + if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || next_gen != batch->generation) { - extract_crng((u8 *)batch->entropy_u32); + _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); batch->position = 0; batch->generation = next_gen; } - ret = batch->entropy_u32[batch->position++]; + ret = batch->entropy_u32[batch->position]; + batch->entropy_u32[batch->position] = 0; + ++batch->position; local_unlock_irqrestore(&batched_entropy_u32.lock, flags); return ret; } -- GitLab From 66e4c2b9541503d721e936cc3898c9f25f4591ff Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Feb 2022 19:23:17 +0100 Subject: [PATCH 0539/1586] random: use hash function for crng_slow_load() Since we have a hash function that's really fast, and the goal of crng_slow_load() is reportedly to "touch all of the crng's state", we can just hash the old state together with the new state and call it a day. This way we dont need to reason about another LFSR or worry about various attacks there. This code is only ever used at early boot and then never again. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 538eaa469f5e4..630b9b9e7d256 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -477,42 +477,30 @@ static size_t crng_fast_load(const u8 *cp, size_t len) * all), and (2) it doesn't have the performance constraints of * crng_fast_load(). * - * So we do something more comprehensive which is guaranteed to touch - * all of the primary_crng's state, and which uses a LFSR with a - * period of 255 as part of the mixing algorithm. Finally, we do - * *not* advance crng_init_cnt since buffer we may get may be something - * like a fixed DMI table (for example), which might very well be - * unique to the machine, but is otherwise unvarying. + * So, we simply hash the contents in with the current key. Finally, + * we do *not* advance crng_init_cnt since buffer we may get may be + * something like a fixed DMI table (for example), which might very + * well be unique to the machine, but is otherwise unvarying. */ -static int crng_slow_load(const u8 *cp, size_t len) +static void crng_slow_load(const u8 *cp, size_t len) { unsigned long flags; - static u8 lfsr = 1; - u8 tmp; - unsigned int i, max = sizeof(base_crng.key); - const u8 *src_buf = cp; - u8 *dest_buf = base_crng.key; + struct blake2s_state hash; + + blake2s_init(&hash, sizeof(base_crng.key)); if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; + return; if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; - } - if (len > max) - max = len; - - for (i = 0; i < max; i++) { - tmp = lfsr; - lfsr >>= 1; - if (tmp & 1) - lfsr ^= 0xE1; - tmp = dest_buf[i % sizeof(base_crng.key)]; - dest_buf[i % sizeof(base_crng.key)] ^= src_buf[i % len] ^ lfsr; - lfsr += (tmp << 3) | (tmp >> 5); + return; } + + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, cp, len); + blake2s_final(&hash, base_crng.key); + spin_unlock_irqrestore(&base_crng.lock, flags); - return 1; } static void crng_reseed(void) -- GitLab From 04ec96b768c9dd43946b047c3da60dcc66431370 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 14:43:25 +0100 Subject: [PATCH 0540/1586] random: make more consistent use of integer types We've been using a flurry of int, unsigned int, size_t, and ssize_t. Let's unify all of this into size_t where it makes sense, as it does in most places, and leave ssize_t for return values with possible errors. In addition, keeping with the convention of other functions in this file, functions that are dealing with raw bytes now take void * consistently instead of a mix of that and u8 *, because much of the time we're actually passing some other structure that is then interpreted as bytes by the function. We also take the opportunity to fix the outdated and incorrect comment in get_random_bytes_arch(). Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 123 +++++++++++++++------------------- include/linux/hw_random.h | 2 +- include/linux/random.h | 10 +-- include/trace/events/random.h | 79 +++++++++++----------- 4 files changed, 99 insertions(+), 115 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 630b9b9e7d256..768dee5e081a5 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -69,7 +69,7 @@ * * The primary kernel interfaces are: * - * void get_random_bytes(void *buf, int nbytes); + * void get_random_bytes(void *buf, size_t nbytes); * u32 get_random_u32() * u64 get_random_u64() * unsigned int get_random_int() @@ -97,14 +97,14 @@ * The current exported interfaces for gathering environmental noise * from the devices are: * - * void add_device_randomness(const void *buf, unsigned int size); + * void add_device_randomness(const void *buf, size_t size); * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); * void add_interrupt_randomness(int irq); * void add_disk_randomness(struct gendisk *disk); - * void add_hwgenerator_randomness(const char *buffer, size_t count, + * void add_hwgenerator_randomness(const void *buffer, size_t count, * size_t entropy); - * void add_bootloader_randomness(const void *buf, unsigned int size); + * void add_bootloader_randomness(const void *buf, size_t size); * * add_device_randomness() is for adding data to the random pool that * is likely to differ between two devices (or possibly even per boot). @@ -268,7 +268,7 @@ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, int nbytes); +static void _get_random_bytes(void *buf, size_t nbytes); static struct ratelimit_state unseeded_warning = RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); @@ -290,7 +290,7 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); static struct { struct blake2s_state hash; spinlock_t lock; - int entropy_count; + unsigned int entropy_count; } input_pool = { .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, @@ -308,18 +308,12 @@ static void crng_reseed(void); * update the entropy estimate. The caller should call * credit_entropy_bits if this is appropriate. */ -static void _mix_pool_bytes(const void *in, int nbytes) +static void _mix_pool_bytes(const void *in, size_t nbytes) { blake2s_update(&input_pool.hash, in, nbytes); } -static void __mix_pool_bytes(const void *in, int nbytes) -{ - trace_mix_pool_bytes_nolock(nbytes, _RET_IP_); - _mix_pool_bytes(in, nbytes); -} - -static void mix_pool_bytes(const void *in, int nbytes) +static void mix_pool_bytes(const void *in, size_t nbytes) { unsigned long flags; @@ -383,18 +377,18 @@ static void process_random_ready_list(void) spin_unlock_irqrestore(&random_ready_list_lock, flags); } -static void credit_entropy_bits(int nbits) +static void credit_entropy_bits(size_t nbits) { - int entropy_count, orig; + unsigned int entropy_count, orig, add; - if (nbits <= 0) + if (!nbits) return; - nbits = min(nbits, POOL_BITS); + add = min_t(size_t, nbits, POOL_BITS); do { orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min(POOL_BITS, orig + nbits); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); @@ -443,10 +437,10 @@ static void invalidate_batched_entropy(void); * path. So we can't afford to dilly-dally. Returns the number of * bytes processed from cp. */ -static size_t crng_fast_load(const u8 *cp, size_t len) +static size_t crng_fast_load(const void *cp, size_t len) { unsigned long flags; - u8 *p; + const u8 *src = (const u8 *)cp; size_t ret = 0; if (!spin_trylock_irqsave(&base_crng.lock, flags)) @@ -455,10 +449,9 @@ static size_t crng_fast_load(const u8 *cp, size_t len) spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } - p = base_crng.key; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % sizeof(base_crng.key)] ^= *cp; - cp++; crng_init_cnt++; len--; ret++; + base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; + src++; crng_init_cnt++; len--; ret++; } if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); @@ -482,7 +475,7 @@ static size_t crng_fast_load(const u8 *cp, size_t len) * something like a fixed DMI table (for example), which might very * well be unique to the machine, but is otherwise unvarying. */ -static void crng_slow_load(const u8 *cp, size_t len) +static void crng_slow_load(const void *cp, size_t len) { unsigned long flags; struct blake2s_state hash; @@ -656,14 +649,15 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { bool large_request = nbytes > 256; - ssize_t ret = 0, len; + ssize_t ret = 0; + size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; u8 output[CHACHA_BLOCK_SIZE]; if (!nbytes) return 0; - len = min_t(ssize_t, 32, nbytes); + len = min_t(size_t, 32, nbytes); crng_make_state(chacha_state, output, len); if (copy_to_user(buf, output, len)) @@ -683,7 +677,7 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - len = min_t(ssize_t, nbytes, CHACHA_BLOCK_SIZE); + len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); if (copy_to_user(buf, output, len)) { ret = -EFAULT; break; @@ -721,7 +715,7 @@ struct timer_rand_state { * the entropy pool having similar initial state across largely * identical devices. */ -void add_device_randomness(const void *buf, unsigned int size) +void add_device_randomness(const void *buf, size_t size) { unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; @@ -749,7 +743,7 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; * keyboard scan codes, and 256 upwards for interrupts. * */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned num) +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { struct { long jiffies; @@ -793,7 +787,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy estimate to 12 bits. */ - credit_entropy_bits(min_t(int, fls(delta >> 1), 11)); + credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11)); } void add_input_randomness(unsigned int type, unsigned int code, @@ -874,8 +868,8 @@ void add_interrupt_randomness(int irq) add_interrupt_bench(cycles); if (unlikely(crng_init == 0)) { - if ((fast_pool->count >= 64) && - crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { + if (fast_pool->count >= 64 && + crng_fast_load(fast_pool->pool, sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { @@ -893,7 +887,7 @@ void add_interrupt_randomness(int irq) return; fast_pool->last = now; - __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); spin_unlock(&input_pool.lock); fast_pool->count = 0; @@ -1002,18 +996,18 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ -static void _get_random_bytes(void *buf, int nbytes) +static void _get_random_bytes(void *buf, size_t nbytes) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 tmp[CHACHA_BLOCK_SIZE]; - ssize_t len; + size_t len; trace_get_random_bytes(nbytes, _RET_IP_); if (!nbytes) return; - len = min_t(ssize_t, 32, nbytes); + len = min_t(size_t, 32, nbytes); crng_make_state(chacha_state, buf, len); nbytes -= len; buf += len; @@ -1036,7 +1030,7 @@ static void _get_random_bytes(void *buf, int nbytes) memzero_explicit(chacha_state, sizeof(chacha_state)); } -void get_random_bytes(void *buf, int nbytes) +void get_random_bytes(void *buf, size_t nbytes) { static void *previous; @@ -1197,25 +1191,19 @@ EXPORT_SYMBOL(del_random_ready_callback); /* * This function will use the architecture-specific hardware random - * number generator if it is available. The arch-specific hw RNG will - * almost certainly be faster than what we can do in software, but it - * is impossible to verify that it is implemented securely (as - * opposed, to, say, the AES encryption of a sequence number using a - * key known by the NSA). So it's useful if we need the speed, but - * only if we're willing to trust the hardware manufacturer not to - * have put in a back door. - * - * Return number of bytes filled in. + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. */ -int __must_check get_random_bytes_arch(void *buf, int nbytes) +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) { - int left = nbytes; + size_t left = nbytes; u8 *p = buf; trace_get_random_bytes_arch(left, _RET_IP_); while (left) { unsigned long v; - int chunk = min_t(int, left, sizeof(unsigned long)); + size_t chunk = min_t(size_t, left, sizeof(unsigned long)); if (!arch_get_random_long(&v)) break; @@ -1248,12 +1236,12 @@ early_param("random.trust_cpu", parse_trust_cpu); */ int __init rand_initialize(void) { - int i; + size_t i; ktime_t now = ktime_get_real(); bool arch_init = true; unsigned long rv; - for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) { + for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { if (!arch_get_random_seed_long_early(&rv) && !arch_get_random_long_early(&rv)) { rv = random_get_entropy(); @@ -1302,7 +1290,7 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, nbytes = min_t(size_t, nbytes, INT_MAX >> 6); ret = get_random_bytes_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count); + trace_urandom_read(nbytes, input_pool.entropy_count); return ret; } @@ -1346,19 +1334,18 @@ static __poll_t random_poll(struct file *file, poll_table *wait) return mask; } -static int write_pool(const char __user *buffer, size_t count) +static int write_pool(const char __user *ubuf, size_t count) { - size_t bytes; - u8 buf[BLAKE2S_BLOCK_SIZE]; - const char __user *p = buffer; + size_t len; + u8 block[BLAKE2S_BLOCK_SIZE]; - while (count > 0) { - bytes = min(count, sizeof(buf)); - if (copy_from_user(buf, p, bytes)) + while (count) { + len = min(count, sizeof(block)); + if (copy_from_user(block, ubuf, len)) return -EFAULT; - count -= bytes; - p += bytes; - mix_pool_bytes(buf, bytes); + count -= len; + ubuf += len; + mix_pool_bytes(block, len); cond_resched(); } @@ -1368,7 +1355,7 @@ static int write_pool(const char __user *buffer, size_t count) static ssize_t random_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - size_t ret; + int ret; ret = write_pool(buffer, count); if (ret) @@ -1464,8 +1451,6 @@ const struct file_operations urandom_fops = { SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) { - int ret; - if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; @@ -1480,6 +1465,8 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, count = INT_MAX; if (!(flags & GRND_INSECURE) && !crng_ready()) { + int ret; + if (flags & GRND_NONBLOCK) return -EAGAIN; ret = wait_for_random_bytes(); @@ -1751,7 +1738,7 @@ unsigned long randomize_page(unsigned long start, unsigned long range) * Those devices may produce endless random bits and will be throttled * when our pool is full. */ -void add_hwgenerator_randomness(const char *buffer, size_t count, +void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { @@ -1782,7 +1769,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); * it would be regarded as device data. * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. */ -void add_bootloader_randomness(const void *buf, unsigned int size) +void add_bootloader_randomness(const void *buf, size_t size) { if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) add_hwgenerator_randomness(buf, size, size * 8); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da216..1a9fc38f8938c 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -61,6 +61,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); /** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); +extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/random.h b/include/linux/random.h index c45b2693e51fb..e92efb39779cc 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -20,8 +20,8 @@ struct random_ready_callback { struct module *owner; }; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +extern void add_device_randomness(const void *, size_t); +extern void add_bootloader_randomness(const void *, size_t); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -37,13 +37,13 @@ extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; -extern void get_random_bytes(void *buf, int nbytes); +extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; @@ -87,7 +87,7 @@ static inline unsigned long get_random_canary(void) /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index ad149aeaf42c5..0609a2810a120 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -9,13 +9,13 @@ #include TRACE_EVENT(add_device_randomness, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) + __field(size_t, bytes ) + __field(unsigned long, IP ) ), TP_fast_assign( @@ -23,18 +23,18 @@ TRACE_EVENT(add_device_randomness, __entry->IP = IP; ), - TP_printk("bytes %d caller %pS", + TP_printk("bytes %zu caller %pS", __entry->bytes, (void *)__entry->IP) ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) + __field(size_t, bytes ) + __field(unsigned long, IP ) ), TP_fast_assign( @@ -42,12 +42,12 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes, __entry->IP = IP; ), - TP_printk("input pool: bytes %d caller %pS", + TP_printk("input pool: bytes %zu caller %pS", __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), + TP_PROTO(size_t bytes, unsigned long IP), TP_ARGS(bytes, IP) ); @@ -59,13 +59,13 @@ DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(int bits, int entropy_count, unsigned long IP), + TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP), TP_ARGS(bits, entropy_count, IP), TP_STRUCT__entry( - __field( int, bits ) - __field( int, entropy_count ) + __field(size_t, bits ) + __field(size_t, entropy_count ) __field(unsigned long, IP ) ), @@ -75,34 +75,34 @@ TRACE_EVENT(credit_entropy_bits, __entry->IP = IP; ), - TP_printk("input pool: bits %d entropy_count %d caller %pS", + TP_printk("input pool: bits %zu entropy_count %zu caller %pS", __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(add_input_randomness, - TP_PROTO(int input_bits), + TP_PROTO(size_t input_bits), TP_ARGS(input_bits), TP_STRUCT__entry( - __field( int, input_bits ) + __field(size_t, input_bits ) ), TP_fast_assign( __entry->input_bits = input_bits; ), - TP_printk("input_pool_bits %d", __entry->input_bits) + TP_printk("input_pool_bits %zu", __entry->input_bits) ); TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, int input_bits), + TP_PROTO(dev_t dev, size_t input_bits), TP_ARGS(dev, input_bits), TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, input_bits ) + __field(dev_t, dev ) + __field(size_t, input_bits ) ), TP_fast_assign( @@ -110,17 +110,17 @@ TRACE_EVENT(add_disk_randomness, __entry->input_bits = input_bits; ), - TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), + TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->input_bits) ); DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP), TP_STRUCT__entry( - __field( int, nbytes ) + __field(size_t, nbytes ) __field(unsigned long, IP ) ), @@ -129,29 +129,29 @@ DECLARE_EVENT_CLASS(random__get_random_bytes, __entry->IP = IP; ), - TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) + TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(int nbytes, unsigned long IP), + TP_PROTO(size_t nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count), + TP_PROTO(size_t nbytes, size_t entropy_count), TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( - __field( int, nbytes ) - __field( int, entropy_count ) + __field( size_t, nbytes ) + __field( size_t, entropy_count ) ), TP_fast_assign( @@ -159,37 +159,34 @@ DECLARE_EVENT_CLASS(random__extract_entropy, __entry->entropy_count = entropy_count; ), - TP_printk("input pool: nbytes %d entropy_count %d", + TP_printk("input pool: nbytes %zu entropy_count %zu", __entry->nbytes, __entry->entropy_count) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count), + TP_PROTO(size_t nbytes, size_t entropy_count), TP_ARGS(nbytes, entropy_count) ); TRACE_EVENT(urandom_read, - TP_PROTO(int got_bits, int pool_left, int input_left), + TP_PROTO(size_t nbytes, size_t entropy_count), - TP_ARGS(got_bits, pool_left, input_left), + TP_ARGS(nbytes, entropy_count), TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, pool_left ) - __field( int, input_left ) + __field( size_t, nbytes ) + __field( size_t, entropy_count ) ), TP_fast_assign( - __entry->got_bits = got_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; + __entry->nbytes = nbytes; + __entry->entropy_count = entropy_count; ), - TP_printk("got_bits %d nonblocking_pool_entropy_left %d " - "input_entropy_left %d", __entry->got_bits, - __entry->pool_left, __entry->input_left) + TP_printk("reading: nbytes %zu entropy_count %zu", + __entry->nbytes, __entry->entropy_count) ); TRACE_EVENT(prandom_u32, -- GitLab From 434537ae54ad37e93555de21b6ac8133d6d773a9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 7 Feb 2022 23:37:13 +0100 Subject: [PATCH 0541/1586] random: remove outdated INT_MAX >> 6 check in urandom_read() In 79a8468747c5 ("random: check for increase of entropy_count because of signed conversion"), a number of checks were added around what values were passed to account(), because account() was doing fancy fixed point fractional arithmetic, and a user had some ability to pass large values directly into it. One of things in that commit was limiting those values to INT_MAX >> 6. The first >> 3 was for bytes to bits, and the next >> 3 was for bits to 1/8 fractional bits. However, for several years now, urandom reads no longer touch entropy accounting, and so this check serves no purpose. The current flow is: urandom_read_nowarn()-->get_random_bytes_user()-->chacha20_block() Of course, we don't want that size_t to be truncated when adding it into the ssize_t. But we arrive at urandom_read_nowarn() in the first place either via ordinary fops, which limits reads to MAX_RW_COUNT, or via getrandom() which limits reads to INT_MAX. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 768dee5e081a5..896ec54f8f5cc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1286,9 +1286,8 @@ void rand_initialize_disk(struct gendisk *disk) static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - int ret; + ssize_t ret; - nbytes = min_t(size_t, nbytes, INT_MAX >> 6); ret = get_random_bytes_user(buf, nbytes); trace_urandom_read(nbytes, input_pool.entropy_count); return ret; -- GitLab From 7b5164fb1279bf0251371848e40bae646b59b3a8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 18:42:13 +0100 Subject: [PATCH 0542/1586] random: zero buffer after reading entropy from userspace This buffer may contain entropic data that shouldn't stick around longer than needed, so zero out the temporary buffer at the end of write_pool(). Reviewed-by: Dominik Brodowski Reviewed-by: Jann Horn Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 896ec54f8f5cc..8ae7d0b30280c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1336,19 +1336,24 @@ static __poll_t random_poll(struct file *file, poll_table *wait) static int write_pool(const char __user *ubuf, size_t count) { size_t len; + int ret = 0; u8 block[BLAKE2S_BLOCK_SIZE]; while (count) { len = min(count, sizeof(block)); - if (copy_from_user(block, ubuf, len)) - return -EFAULT; + if (copy_from_user(block, ubuf, len)) { + ret = -EFAULT; + goto out; + } count -= len; ubuf += len; mix_pool_bytes(block, len); cond_resched(); } - return 0; +out: + memzero_explicit(block, sizeof(block)); + return ret; } static ssize_t random_write(struct file *file, const char __user *buffer, -- GitLab From 7191c628fe07b70d3f37de736d173d1b115396ed Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 9 Feb 2022 19:57:06 +0100 Subject: [PATCH 0543/1586] random: fix locking for crng_init in crng_reseed() crng_init is protected by primary_crng->lock. Therefore, we need to hold this lock when increasing crng_init to 2. As we shouldn't hold this lock for too long, only hold it for those parts which require protection. Signed-off-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8ae7d0b30280c..1ccd92efa8f41 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -502,6 +502,7 @@ static void crng_reseed(void) int entropy_count; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; + bool finalize_init = false; /* * First we make sure we have POOL_MIN_BITS of entropy in the pool, @@ -529,12 +530,14 @@ static void crng_reseed(void) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); - spin_unlock_irqrestore(&base_crng.lock, flags); - memzero_explicit(key, sizeof(key)); - if (crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; + finalize_init = true; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); + if (finalize_init) { process_random_ready_list(); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); -- GitLab From 0791e8b655cc373718f0f58800fdc625a3447ac5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Feb 2022 22:46:48 +0100 Subject: [PATCH 0544/1586] random: tie batched entropy generation to base_crng generation Now that we have an explicit base_crng generation counter, we don't need a separate one for batched entropy. Rather, we can just move the generation forward every time we change crng_init state or update the base_crng key. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1ccd92efa8f41..c27ebf7073809 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -430,8 +430,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = { static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); -static void invalidate_batched_entropy(void); - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -454,7 +452,7 @@ static size_t crng_fast_load(const void *cp, size_t len) src++; crng_init_cnt++; len--; ret++; } if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - invalidate_batched_entropy(); + ++base_crng.generation; crng_init = 1; } spin_unlock_irqrestore(&base_crng.lock, flags); @@ -531,7 +529,6 @@ static void crng_reseed(void) WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); if (crng_init < 2) { - invalidate_batched_entropy(); crng_init = 2; finalize_init = true; } @@ -1256,8 +1253,9 @@ int __init rand_initialize(void) mix_pool_bytes(utsname(), sizeof(*(utsname()))); extract_entropy(base_crng.key, sizeof(base_crng.key)); + ++base_crng.generation; + if (arch_init && trust_cpu && crng_init < 2) { - invalidate_batched_entropy(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } @@ -1607,8 +1605,6 @@ static int __init random_sysctls_init(void) device_initcall(random_sysctls_init); #endif /* CONFIG_SYSCTL */ -static atomic_t batch_generation = ATOMIC_INIT(0); - struct batched_entropy { union { /* @@ -1622,8 +1618,8 @@ struct batched_entropy { u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; }; local_lock_t lock; + unsigned long generation; unsigned int position; - int generation; }; /* @@ -1643,14 +1639,14 @@ u64 get_random_u64(void) unsigned long flags; struct batched_entropy *batch; static void *previous; - int next_gen; + unsigned long next_gen; warn_unseeded_randomness(&previous); local_lock_irqsave(&batched_entropy_u64.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u64); - next_gen = atomic_read(&batch_generation); + next_gen = READ_ONCE(base_crng.generation); if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || next_gen != batch->generation) { _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); @@ -1677,14 +1673,14 @@ u32 get_random_u32(void) unsigned long flags; struct batched_entropy *batch; static void *previous; - int next_gen; + unsigned long next_gen; warn_unseeded_randomness(&previous); local_lock_irqsave(&batched_entropy_u32.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u32); - next_gen = atomic_read(&batch_generation); + next_gen = READ_ONCE(base_crng.generation); if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || next_gen != batch->generation) { _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); @@ -1700,15 +1696,6 @@ u32 get_random_u32(void) } EXPORT_SYMBOL(get_random_u32); -/* It's important to invalidate all potential batched entropy that might - * be stored before the crng is initialized, which we can do lazily by - * bumping the generation counter. - */ -static void invalidate_batched_entropy(void) -{ - atomic_inc(&batch_generation); -} - /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. -- GitLab From 95e6060c20a7f5db60163274c5222a725ac118f9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:35:24 +0100 Subject: [PATCH 0545/1586] random: remove ifdef'd out interrupt bench With tools like kbench9000 giving more finegrained responses, and this basically never having been used ever since it was initially added, let's just get rid of this. There *is* still work to be done on the interrupt handler, but this really isn't the way it's being developed. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- Documentation/admin-guide/sysctl/kernel.rst | 9 ----- drivers/char/random.c | 40 --------------------- 2 files changed, 49 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d3c6d9a501a9c..5dd660aac0aee 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1041,15 +1041,6 @@ This is a directory, with the following entries: are woken up. This file is writable for compatibility purposes, but writing to it has no effect on any RNG behavior. -If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH`` -defined, these additional entries are present: - -* ``add_interrupt_avg_cycles``: the average number of cycles between - interrupts used to feed the pool; - -* ``add_interrupt_avg_deviation``: the standard deviation seen on the - number of cycles between interrupts used to feed the pool. - randomize_va_space ================== diff --git a/drivers/char/random.c b/drivers/char/random.c index c27ebf7073809..35c440a0d83cf 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -240,8 +240,6 @@ #define CREATE_TRACE_POINTS #include -/* #define ADD_INTERRUPT_BENCH */ - enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ @@ -808,27 +806,6 @@ EXPORT_SYMBOL_GPL(add_input_randomness); static DEFINE_PER_CPU(struct fast_pool, irq_randomness); -#ifdef ADD_INTERRUPT_BENCH -static unsigned long avg_cycles, avg_deviation; - -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT - 1)) - -static void add_interrupt_bench(cycles_t start) -{ - long delta = random_get_entropy() - start; - - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; -} -#else -#define add_interrupt_bench(x) -#endif - static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { u32 *ptr = (u32 *)regs; @@ -865,7 +842,6 @@ void add_interrupt_randomness(int irq) (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); fast_mix(fast_pool); - add_interrupt_bench(cycles); if (unlikely(crng_init == 0)) { if (fast_pool->count >= 64 && @@ -1574,22 +1550,6 @@ static struct ctl_table random_table[] = { .mode = 0444, .proc_handler = proc_do_uuid, }, -#ifdef ADD_INTERRUPT_BENCH - { - .procname = "add_interrupt_avg_cycles", - .data = &avg_cycles, - .maxlen = sizeof(avg_cycles), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "add_interrupt_avg_deviation", - .data = &avg_deviation, - .maxlen = sizeof(avg_deviation), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, -#endif { } }; -- GitLab From 14c174633f349cb41ea90c2c0aaddac157012f74 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:40:44 +0100 Subject: [PATCH 0546/1586] random: remove unused tracepoints These explicit tracepoints aren't really used and show sign of aging. It's work to keep these up to date, and before I attempted to keep them up to date, they weren't up to date, which indicates that they're not really used. These days there are better ways of introspecting anyway. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 30 +---- include/trace/events/random.h | 212 ---------------------------------- lib/random32.c | 2 - 3 files changed, 3 insertions(+), 241 deletions(-) delete mode 100644 include/trace/events/random.h diff --git a/drivers/char/random.c b/drivers/char/random.c index 35c440a0d83cf..055c42246b688 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -237,9 +237,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ @@ -315,7 +312,6 @@ static void mix_pool_bytes(const void *in, size_t nbytes) { unsigned long flags; - trace_mix_pool_bytes(nbytes, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(in, nbytes); spin_unlock_irqrestore(&input_pool.lock, flags); @@ -389,8 +385,6 @@ static void credit_entropy_bits(size_t nbits) entropy_count = min_t(unsigned int, POOL_BITS, orig + add); } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_); - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) crng_reseed(); } @@ -721,7 +715,6 @@ void add_device_randomness(const void *buf, size_t size) if (!crng_ready() && size) crng_slow_load(buf, size); - trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(buf, size); _mix_pool_bytes(&time, sizeof(time)); @@ -800,7 +793,6 @@ void add_input_randomness(unsigned int type, unsigned int code, last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_input_randomness); @@ -880,7 +872,6 @@ void add_disk_randomness(struct gendisk *disk) return; /* first major is 1, so we get >= 0x200 here */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), input_pool.entropy_count); } EXPORT_SYMBOL_GPL(add_disk_randomness); #endif @@ -905,8 +896,6 @@ static void extract_entropy(void *buf, size_t nbytes) } block; size_t i; - trace_extract_entropy(nbytes, input_pool.entropy_count); - for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { if (!arch_get_random_seed_long(&block.rdseed[i]) && !arch_get_random_long(&block.rdseed[i])) @@ -978,8 +967,6 @@ static void _get_random_bytes(void *buf, size_t nbytes) u8 tmp[CHACHA_BLOCK_SIZE]; size_t len; - trace_get_random_bytes(nbytes, _RET_IP_); - if (!nbytes) return; @@ -1176,7 +1163,6 @@ size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) size_t left = nbytes; u8 *p = buf; - trace_get_random_bytes_arch(left, _RET_IP_); while (left) { unsigned long v; size_t chunk = min_t(size_t, left, sizeof(unsigned long)); @@ -1260,16 +1246,6 @@ void rand_initialize_disk(struct gendisk *disk) } #endif -static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) -{ - ssize_t ret; - - ret = get_random_bytes_user(buf, nbytes); - trace_urandom_read(nbytes, input_pool.entropy_count); - return ret; -} - static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -1282,7 +1258,7 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, current->comm, nbytes); } - return urandom_read_nowarn(file, buf, nbytes, ppos); + return get_random_bytes_user(buf, nbytes); } static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, @@ -1293,7 +1269,7 @@ static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, ret = wait_for_random_bytes(); if (ret != 0) return ret; - return urandom_read_nowarn(file, buf, nbytes, ppos); + return get_random_bytes_user(buf, nbytes); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1454,7 +1430,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, if (unlikely(ret)) return ret; } - return urandom_read_nowarn(NULL, buf, count, NULL); + return get_random_bytes_user(buf, count); } /******************************************************************** diff --git a/include/trace/events/random.h b/include/trace/events/random.h deleted file mode 100644 index 0609a2810a120..0000000000000 --- a/include/trace/events/random.h +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM random - -#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_RANDOM_H - -#include -#include - -TRACE_EVENT(add_device_randomness, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field(size_t, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("bytes %zu caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field(size_t, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("input pool: bytes %zu caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(size_t bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -TRACE_EVENT(credit_entropy_bits, - TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP), - - TP_ARGS(bits, entropy_count, IP), - - TP_STRUCT__entry( - __field(size_t, bits ) - __field(size_t, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bits = bits; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("input pool: bits %zu entropy_count %zu caller %pS", - __entry->bits, __entry->entropy_count, (void *)__entry->IP) -); - -TRACE_EVENT(add_input_randomness, - TP_PROTO(size_t input_bits), - - TP_ARGS(input_bits), - - TP_STRUCT__entry( - __field(size_t, input_bits ) - ), - - TP_fast_assign( - __entry->input_bits = input_bits; - ), - - TP_printk("input_pool_bits %zu", __entry->input_bits) -); - -TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, size_t input_bits), - - TP_ARGS(dev, input_bits), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(size_t, input_bits ) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->input_bits = input_bits; - ), - - TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->input_bits) -); - -DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP), - - TP_STRUCT__entry( - __field(size_t, nbytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->IP = IP; - ), - - TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(size_t nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count), - - TP_STRUCT__entry( - __field( size_t, nbytes ) - __field( size_t, entropy_count ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - ), - - TP_printk("input pool: nbytes %zu entropy_count %zu", - __entry->nbytes, __entry->entropy_count) -); - - -DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count) -); - -TRACE_EVENT(urandom_read, - TP_PROTO(size_t nbytes, size_t entropy_count), - - TP_ARGS(nbytes, entropy_count), - - TP_STRUCT__entry( - __field( size_t, nbytes ) - __field( size_t, entropy_count ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - ), - - TP_printk("reading: nbytes %zu entropy_count %zu", - __entry->nbytes, __entry->entropy_count) -); - -TRACE_EVENT(prandom_u32, - - TP_PROTO(unsigned int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( unsigned int, ret) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret=%u" , __entry->ret) -); - -#endif /* _TRACE_RANDOM_H */ - -/* This part must be outside protection */ -#include diff --git a/lib/random32.c b/lib/random32.c index a57a0e18819d0..3c19820796d04 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -41,7 +41,6 @@ #include #include #include -#include /** * prandom_u32_state - seeded pseudo-random number generator. @@ -387,7 +386,6 @@ u32 prandom_u32(void) struct siprand_state *state = get_cpu_ptr(&net_rand_state); u32 res = siprand_u32(state); - trace_prandom_u32(res); put_cpu_ptr(&net_rand_state); return res; } -- GitLab From a07fdae346c35c6ba286af1c88e0effcfa330bf9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:43:57 +0100 Subject: [PATCH 0547/1586] random: add proper SPDX header Convert the current license into the SPDX notation of "(GPL-2.0 OR BSD-3-Clause)". This infers GPL-2.0 from the text "ALTERNATIVELY, this product may be distributed under the terms of the GNU General Public License, in which case the provisions of the GPL are required INSTEAD OF the above restrictions" and it infers BSD-3-Clause from the verbatim BSD 3 clause license in the file. Cc: Thomas Gleixner Cc: Theodore Ts'o Cc: Dominik Brodowski Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 055c42246b688..5373fae0aa632 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,44 +1,9 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * random.c -- A strong random number generator - * * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. - * * Copyright Matt Mackall , 2003, 2004, 2005 - * * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All * rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, and the entire permission notice in its entirety, - * including the disclaimer of warranties. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * ALTERNATIVELY, this product may be distributed under the terms of - * the GNU General Public License, in which case the provisions of the GPL are - * required INSTEAD OF the above restrictions. (This clause is - * necessary due to a potential bad interaction between the GPL and - * the restrictions contained in a BSD-style copyright.) - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF - * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT - * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. */ /* -- GitLab From b2f408fe403800c91a49f6589d95b6759ce1b30b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 17:01:27 +0100 Subject: [PATCH 0548/1586] random: deobfuscate irq u32/u64 contributions In the irq handler, we fill out 16 bytes differently on 32-bit and 64-bit platforms, and for 32-bit vs 64-bit cycle counters, which doesn't always correspond with the bitness of the platform. Whether or not you like this strangeness, it is a matter of fact. But it might not be a fact you well realized until now, because the code that loaded the irq info into 4 32-bit words was quite confusing. Instead, this commit makes everything explicit by having separate (compile-time) branches for 32-bit and 64-bit types. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 49 ++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5373fae0aa632..e7571de3d91a1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -283,7 +283,10 @@ static void mix_pool_bytes(const void *in, size_t nbytes) } struct fast_pool { - u32 pool[4]; + union { + u32 pool32[4]; + u64 pool64[2]; + }; unsigned long last; u16 reg_idx; u8 count; @@ -294,10 +297,10 @@ struct fast_pool { * collector. It's hardcoded for an 128 bit pool and assumes that any * locks that might be needed are taken by the caller. */ -static void fast_mix(struct fast_pool *f) +static void fast_mix(u32 pool[4]) { - u32 a = f->pool[0], b = f->pool[1]; - u32 c = f->pool[2], d = f->pool[3]; + u32 a = pool[0], b = pool[1]; + u32 c = pool[2], d = pool[3]; a += b; c += d; b = rol32(b, 6); d = rol32(d, 27); @@ -315,9 +318,8 @@ static void fast_mix(struct fast_pool *f) b = rol32(b, 16); d = rol32(d, 14); d ^= a; b ^= c; - f->pool[0] = a; f->pool[1] = b; - f->pool[2] = c; f->pool[3] = d; - f->count++; + pool[0] = a; pool[1] = b; + pool[2] = c; pool[3] = d; } static void process_random_ready_list(void) @@ -784,29 +786,34 @@ void add_interrupt_randomness(int irq) struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); - u32 c_high, j_high; - u64 ip; if (cycles == 0) cycles = get_reg(fast_pool, regs); - c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; - j_high = (sizeof(now) > 4) ? now >> 32 : 0; - fast_pool->pool[0] ^= cycles ^ j_high ^ irq; - fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; - fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= - (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); - fast_mix(fast_pool); + if (sizeof(cycles) == 8) + fast_pool->pool64[0] ^= cycles ^ rol64(now, 32) ^ irq; + else { + fast_pool->pool32[0] ^= cycles ^ irq; + fast_pool->pool32[1] ^= now; + } + + if (sizeof(unsigned long) == 8) + fast_pool->pool64[1] ^= regs ? instruction_pointer(regs) : _RET_IP_; + else { + fast_pool->pool32[2] ^= regs ? instruction_pointer(regs) : _RET_IP_; + fast_pool->pool32[3] ^= get_reg(fast_pool, regs); + } + + fast_mix(fast_pool->pool32); + ++fast_pool->count; if (unlikely(crng_init == 0)) { if (fast_pool->count >= 64 && - crng_fast_load(fast_pool->pool, sizeof(fast_pool->pool)) > 0) { + crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) { fast_pool->count = 0; fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { - _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); spin_unlock(&input_pool.lock); } } @@ -820,7 +827,7 @@ void add_interrupt_randomness(int irq) return; fast_pool->last = now; - _mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); + _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); spin_unlock(&input_pool.lock); fast_pool->count = 0; -- GitLab From 246c03dd899164d0186b6d685d6387f228c28d93 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:19:49 +0100 Subject: [PATCH 0549/1586] random: introduce drain_entropy() helper to declutter crng_reseed() In preparation for separating responsibilities, break out the entropy count management part of crng_reseed() into its own function. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e7571de3d91a1..d9a31ac10daf5 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -260,6 +260,7 @@ static struct { }; static void extract_entropy(void *buf, size_t nbytes); +static bool drain_entropy(void *buf, size_t nbytes); static void crng_reseed(void); @@ -456,23 +457,13 @@ static void crng_slow_load(const void *cp, size_t len) static void crng_reseed(void) { unsigned long flags; - int entropy_count; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; bool finalize_init = false; - /* - * First we make sure we have POOL_MIN_BITS of entropy in the pool, - * and then we drain all of it. Only then can we extract a new key. - */ - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(key, sizeof(key)); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); + /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */ + if (!drain_entropy(key, sizeof(key))) + return; /* * We copy the new key into the base_crng, overwriting the old one, @@ -900,6 +891,25 @@ static void extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } +/* + * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we + * set the entropy count to zero (but don't actually touch any data). Only then + * can we extract a new key with extract_entropy(). + */ +static bool drain_entropy(void *buf, size_t nbytes) +{ + unsigned int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_BITS) + return false; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf, nbytes); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + return true; +} + #define warn_unseeded_randomness(previous) \ _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) -- GitLab From 6071a6c0fba2d747742cadcbb3ba26ed756ed73b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:28:33 +0100 Subject: [PATCH 0550/1586] random: remove useless header comment This really adds nothing at all useful. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/random.h b/include/linux/random.h index e92efb39779cc..37e1e8c43d7ee 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/linux/random.h - * - * Include file for the random number generator. - */ + #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H -- GitLab From 87e7d5abad0cbc9312dea7f889a57d294c1a5fcc Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 13:41:41 +0100 Subject: [PATCH 0551/1586] random: remove whitespace and reorder includes This is purely cosmetic. Future work involves figuring out which of these headers we need and which we don't. Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d9a31ac10daf5..ad112f9281820 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -193,11 +193,10 @@ #include #include #include +#include #include #include - #include -#include #include #include #include -- GitLab From 5f1bb112006b104b3e2a1e1b39bbb9b2617581e6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0552/1586] random: group initialization wait functions This pulls all of the readiness waiting-focused functions into the first labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 333 ++++++++++++++++++++++-------------------- 1 file changed, 172 insertions(+), 161 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ad112f9281820..dd2da0b123502 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -201,44 +201,197 @@ #include #include -enum { - POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ -}; - -/* - * Static global variables - */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static struct fasync_struct *fasync; - -static DEFINE_SPINLOCK(random_ready_list_lock); -static LIST_HEAD(random_ready_list); +/********************************************************************* + * + * Initialization and readiness waiting. + * + * Much of the RNG infrastructure is devoted to various dependencies + * being able to wait until the RNG has collected enough entropy and + * is ready for safe consumption. + * + *********************************************************************/ /* * crng_init = 0 --> Uninitialized * 1 --> Initialized * 2 --> Initialized from input_pool * - * crng_init is protected by primary_crng->lock, and only increases + * crng_init is protected by base_crng->lock, and only increases * its value (from 0->1->2). */ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) -static int crng_init_cnt = 0; -static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, size_t nbytes); +/* Various types of waiters for crng_init->2 transition. */ +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); +static struct fasync_struct *fasync; +static DEFINE_SPINLOCK(random_ready_list_lock); +static LIST_HEAD(random_ready_list); +/* Control how we warn userspace. */ static struct ratelimit_state unseeded_warning = RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); - static int ratelimit_disable __read_mostly; - module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); +/* + * Returns whether or not the input pool has been seeded and thus guaranteed + * to supply cryptographically secure random numbers. This applies to: the + * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, + * ,u64,int,long} family of functions. + * + * Returns: true if the input pool has been seeded. + * false if the input pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return crng_ready(); +} +EXPORT_SYMBOL(rng_is_initialized); + +/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ +static void try_to_generate_entropy(void); + +/* + * Wait for the input pool to be seeded and thus guaranteed to supply + * cryptographically secure random numbers. This applies to: the /dev/urandom + * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} + * family of functions. Using any of these functions without first calling + * this function forfeits the guarantee of security. + * + * Returns: 0 if the input pool has been seeded. + * -ERESTARTSYS if the function was interrupted by a signal. + */ +int wait_for_random_bytes(void) +{ + if (likely(crng_ready())) + return 0; + + do { + int ret; + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + + try_to_generate_entropy(); + } while (!crng_ready()); + + return 0; +} +EXPORT_SYMBOL(wait_for_random_bytes); + +/* + * Add a callback function that will be invoked when the input + * pool is initialised. + * + * returns: 0 if callback is successfully added + * -EALREADY if pool is already initialised (callback not called) + * -ENOENT if module for callback is not alive + */ +int add_random_ready_callback(struct random_ready_callback *rdy) +{ + struct module *owner; + unsigned long flags; + int err = -EALREADY; + + if (crng_ready()) + return err; + + owner = rdy->owner; + if (!try_module_get(owner)) + return -ENOENT; + + spin_lock_irqsave(&random_ready_list_lock, flags); + if (crng_ready()) + goto out; + + owner = NULL; + + list_add(&rdy->list, &random_ready_list); + err = 0; + +out: + spin_unlock_irqrestore(&random_ready_list_lock, flags); + + module_put(owner); + + return err; +} +EXPORT_SYMBOL(add_random_ready_callback); + +/* + * Delete a previously registered readiness callback function. + */ +void del_random_ready_callback(struct random_ready_callback *rdy) +{ + unsigned long flags; + struct module *owner = NULL; + + spin_lock_irqsave(&random_ready_list_lock, flags); + if (!list_empty(&rdy->list)) { + list_del_init(&rdy->list); + owner = rdy->owner; + } + spin_unlock_irqrestore(&random_ready_list_lock, flags); + + module_put(owner); +} +EXPORT_SYMBOL(del_random_ready_callback); + +static void process_random_ready_list(void) +{ + unsigned long flags; + struct random_ready_callback *rdy, *tmp; + + spin_lock_irqsave(&random_ready_list_lock, flags); + list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { + struct module *owner = rdy->owner; + + list_del_init(&rdy->list); + rdy->func(rdy); + module_put(owner); + } + spin_unlock_irqrestore(&random_ready_list_lock, flags); +} + +#define warn_unseeded_randomness(previous) \ + _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) + +static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) +{ +#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM + const bool print_once = false; +#else + static bool print_once __read_mostly; +#endif + + if (print_once || crng_ready() || + (previous && (caller == READ_ONCE(*previous)))) + return; + WRITE_ONCE(*previous, caller); +#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM + print_once = true; +#endif + if (__ratelimit(&unseeded_warning)) + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); +} + + +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ +}; + +/* + * Static global variables + */ +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); + +static int crng_init_cnt = 0; + /********************************************************************** * * OS independent entropy store. Here are the functions which handle @@ -322,22 +475,6 @@ static void fast_mix(u32 pool[4]) pool[2] = c; pool[3] = d; } -static void process_random_ready_list(void) -{ - unsigned long flags; - struct random_ready_callback *rdy, *tmp; - - spin_lock_irqsave(&random_ready_list_lock, flags); - list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { - struct module *owner = rdy->owner; - - list_del_init(&rdy->list); - rdy->func(rdy); - module_put(owner); - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); -} - static void credit_entropy_bits(size_t nbits) { unsigned int entropy_count, orig, add; @@ -387,8 +524,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); - /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. Returns the number of @@ -909,29 +1044,6 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) - -static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) -{ -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; -#endif - - if (print_once || crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) - return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); -} - /* * This function is the exported kernel interface. It returns some * number of good random numbers, suitable for key generation, seeding @@ -1032,107 +1144,6 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -/* - * Wait for the urandom pool to be seeded and thus guaranteed to supply - * cryptographically secure random numbers. This applies to: the /dev/urandom - * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} - * family of functions. Using any of these functions without first calling - * this function forfeits the guarantee of security. - * - * Returns: 0 if the urandom pool has been seeded. - * -ERESTARTSYS if the function was interrupted by a signal. - */ -int wait_for_random_bytes(void) -{ - if (likely(crng_ready())) - return 0; - - do { - int ret; - ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); - if (ret) - return ret > 0 ? 0 : ret; - - try_to_generate_entropy(); - } while (!crng_ready()); - - return 0; -} -EXPORT_SYMBOL(wait_for_random_bytes); - -/* - * Returns whether or not the urandom pool has been seeded and thus guaranteed - * to supply cryptographically secure random numbers. This applies to: the - * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, - * ,u64,int,long} family of functions. - * - * Returns: true if the urandom pool has been seeded. - * false if the urandom pool has not been seeded. - */ -bool rng_is_initialized(void) -{ - return crng_ready(); -} -EXPORT_SYMBOL(rng_is_initialized); - -/* - * Add a callback function that will be invoked when the nonblocking - * pool is initialised. - * - * returns: 0 if callback is successfully added - * -EALREADY if pool is already initialised (callback not called) - * -ENOENT if module for callback is not alive - */ -int add_random_ready_callback(struct random_ready_callback *rdy) -{ - struct module *owner; - unsigned long flags; - int err = -EALREADY; - - if (crng_ready()) - return err; - - owner = rdy->owner; - if (!try_module_get(owner)) - return -ENOENT; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (crng_ready()) - goto out; - - owner = NULL; - - list_add(&rdy->list, &random_ready_list); - err = 0; - -out: - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); - - return err; -} -EXPORT_SYMBOL(add_random_ready_callback); - -/* - * Delete a previously registered readiness callback function. - */ -void del_random_ready_callback(struct random_ready_callback *rdy) -{ - unsigned long flags; - struct module *owner = NULL; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (!list_empty(&rdy->list)) { - list_del_init(&rdy->list); - owner = rdy->owner; - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); -} -EXPORT_SYMBOL(del_random_ready_callback); - /* * This function will use the architecture-specific hardware random * number generator if it is available. It is not recommended for -- GitLab From 3655adc7089da4f8ca74cec8fcef73ea5101430e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0553/1586] random: group crng functions This pulls all of the crng-focused functions into the second labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 862 ++++++++++++++++++++++-------------------- 1 file changed, 445 insertions(+), 417 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index dd2da0b123502..99b55e76b0043 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -380,122 +380,27 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void } -enum { - POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ -}; - -/* - * Static global variables - */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); - -static int crng_init_cnt = 0; - -/********************************************************************** +/********************************************************************* * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. + * Fast key erasure RNG, the "crng". * - **********************************************************************/ - -static struct { - struct blake2s_state hash; - spinlock_t lock; - unsigned int entropy_count; -} input_pool = { - .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), - BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, - BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, - .hash.outlen = BLAKE2S_HASH_SIZE, - .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), -}; - -static void extract_entropy(void *buf, size_t nbytes); -static bool drain_entropy(void *buf, size_t nbytes); - -static void crng_reseed(void); - -/* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. - */ -static void _mix_pool_bytes(const void *in, size_t nbytes) -{ - blake2s_update(&input_pool.hash, in, nbytes); -} - -static void mix_pool_bytes(const void *in, size_t nbytes) -{ - unsigned long flags; - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); - spin_unlock_irqrestore(&input_pool.lock, flags); -} - -struct fast_pool { - union { - u32 pool32[4]; - u64 pool64[2]; - }; - unsigned long last; - u16 reg_idx; - u8 count; -}; - -/* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. - */ -static void fast_mix(u32 pool[4]) -{ - u32 a = pool[0], b = pool[1]; - u32 c = pool[2], d = pool[3]; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - pool[0] = a; pool[1] = b; - pool[2] = c; pool[3] = d; -} - -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(); -} - -/********************************************************************* + * These functions expand entropy from the entropy extractor into + * long streams for external consumption using the "fast key erasure" + * RNG described at . * - * CRNG using CHACHA20 + * There are a few exported interfaces for use by other drivers: + * + * void get_random_bytes(void *buf, size_t nbytes) + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() + * + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. This is equivalent to + * a read from /dev/urandom. The integer family of functions may be + * higher performance for one-off random integers, because they do a + * bit of buffering. * *********************************************************************/ @@ -524,70 +429,14 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -/* - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. Returns the number of - * bytes processed from cp. - */ -static size_t crng_fast_load(const void *cp, size_t len) -{ - unsigned long flags; - const u8 *src = (const u8 *)cp; - size_t ret = 0; - - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; - } - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; - src++; crng_init_cnt++; len--; ret++; - } - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; - crng_init = 1; - } - spin_unlock_irqrestore(&base_crng.lock, flags); - if (crng_init == 1) - pr_notice("fast init done\n"); - return ret; -} +/* Used by crng_reseed() to extract a new seed from the input pool. */ +static bool drain_entropy(void *buf, size_t nbytes); /* - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So, we simply hash the contents in with the current key. Finally, - * we do *not* advance crng_init_cnt since buffer we may get may be - * something like a fixed DMI table (for example), which might very - * well be unique to the machine, but is otherwise unvarying. + * This extracts a new crng key from the input pool, but only if there is a + * sufficient amount of entropy available, in order to mitigate bruteforcing + * of newly added bits. */ -static void crng_slow_load(const void *cp, size_t len) -{ - unsigned long flags; - struct blake2s_state hash; - - blake2s_init(&hash, sizeof(base_crng.key)); - - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return; - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return; - } - - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, cp, len); - blake2s_final(&hash, base_crng.key); - - spin_unlock_irqrestore(&base_crng.lock, flags); -} - static void crng_reseed(void) { unsigned long flags; @@ -637,13 +486,11 @@ static void crng_reseed(void) } /* - * The general form here is based on a "fast key erasure RNG" from - * . It generates a ChaCha - * block using the provided key, and then immediately overwites that - * key with half the block. It returns the resultant ChaCha state to the - * user, along with the second half of the block containing 32 bytes of - * random data that may be used; random_data_len may not be greater than - * 32. + * This generates a ChaCha block using the provided key, and then + * immediately overwites that key with half the block. It returns + * the resultant ChaCha state to the user, along with the second + * half of the block containing 32 bytes of random data that may + * be used; random_data_len may not be greater than 32. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], @@ -730,51 +577,433 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], local_unlock_irqrestore(&crngs.lock, flags); } -static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) +/* + * This function is for crng_init == 0 only. + * + * crng_fast_load() can be called by code in the interrupt service + * path. So we can't afford to dilly-dally. Returns the number of + * bytes processed from cp. + */ +static size_t crng_fast_load(const void *cp, size_t len) +{ + static int crng_init_cnt = 0; + unsigned long flags; + const u8 *src = (const u8 *)cp; + size_t ret = 0; + + if (!spin_trylock_irqsave(&base_crng.lock, flags)) + return 0; + if (crng_init != 0) { + spin_unlock_irqrestore(&base_crng.lock, flags); + return 0; + } + while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { + base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; + src++; crng_init_cnt++; len--; ret++; + } + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + ++base_crng.generation; + crng_init = 1; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + if (crng_init == 1) + pr_notice("fast init done\n"); + return ret; +} + +/* + * This function is for crng_init == 0 only. + * + * crng_slow_load() is called by add_device_randomness, which has two + * attributes. (1) We can't trust the buffer passed to it is + * guaranteed to be unpredictable (so it might not have any entropy at + * all), and (2) it doesn't have the performance constraints of + * crng_fast_load(). + * + * So, we simply hash the contents in with the current key. Finally, + * we do *not* advance crng_init_cnt since buffer we may get may be + * something like a fixed DMI table (for example), which might very + * well be unique to the machine, but is otherwise unvarying. + */ +static void crng_slow_load(const void *cp, size_t len) +{ + unsigned long flags; + struct blake2s_state hash; + + blake2s_init(&hash, sizeof(base_crng.key)); + + if (!spin_trylock_irqsave(&base_crng.lock, flags)) + return; + if (crng_init != 0) { + spin_unlock_irqrestore(&base_crng.lock, flags); + return; + } + + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, cp, len); + blake2s_final(&hash, base_crng.key); + + spin_unlock_irqrestore(&base_crng.lock, flags); +} + +static void _get_random_bytes(void *buf, size_t nbytes) { - bool large_request = nbytes > 256; - ssize_t ret = 0; - size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; - u8 output[CHACHA_BLOCK_SIZE]; + u8 tmp[CHACHA_BLOCK_SIZE]; + size_t len; + + if (!nbytes) + return; + + len = min_t(size_t, 32, nbytes); + crng_make_state(chacha_state, buf, len); + nbytes -= len; + buf += len; + + while (nbytes) { + if (nbytes < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, nbytes); + memzero_explicit(tmp, sizeof(tmp)); + break; + } + + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + nbytes -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + } + + memzero_explicit(chacha_state, sizeof(chacha_state)); +} + +/* + * This function is the exported kernel interface. It returns some + * number of good random numbers, suitable for key generation, seeding + * TCP sequence numbers, etc. It does not rely on the hardware random + * number generator. For random bytes direct from the hardware RNG + * (when available), use get_random_bytes_arch(). In order to ensure + * that the randomness provided by this function is okay, the function + * wait_for_random_bytes() should be called and return 0 at least once + * at any point prior. + */ +void get_random_bytes(void *buf, size_t nbytes) +{ + static void *previous; + + warn_unseeded_randomness(&previous); + _get_random_bytes(buf, nbytes); +} +EXPORT_SYMBOL(get_random_bytes); + +static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) +{ + bool large_request = nbytes > 256; + ssize_t ret = 0; + size_t len; + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 output[CHACHA_BLOCK_SIZE]; + + if (!nbytes) + return 0; + + len = min_t(size_t, 32, nbytes); + crng_make_state(chacha_state, output, len); + + if (copy_to_user(buf, output, len)) + return -EFAULT; + nbytes -= len; + buf += len; + ret += len; + + while (nbytes) { + if (large_request && need_resched()) { + if (signal_pending(current)) + break; + schedule(); + } + + chacha20_block(chacha_state, output); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + + len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); + if (copy_to_user(buf, output, len)) { + ret = -EFAULT; + break; + } + + nbytes -= len; + buf += len; + ret += len; + } + + memzero_explicit(chacha_state, sizeof(chacha_state)); + memzero_explicit(output, sizeof(output)); + return ret; +} + +/* + * Batched entropy returns random integers. The quality of the random + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. + */ +struct batched_entropy { + union { + /* + * We make this 1.5x a ChaCha block, so that we get the + * remaining 32 bytes from fast key erasure, plus one full + * block from the detached ChaCha state. We can increase + * the size of this later if needed so long as we keep the + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. + */ + u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; + u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; + }; + local_lock_t lock; + unsigned long generation; + unsigned int position; +}; + + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), + .position = UINT_MAX +}; + +u64 get_random_u64(void) +{ + u64 ret; + unsigned long flags; + struct batched_entropy *batch; + static void *previous; + unsigned long next_gen; + + warn_unseeded_randomness(&previous); + + local_lock_irqsave(&batched_entropy_u64.lock, flags); + batch = raw_cpu_ptr(&batched_entropy_u64); + + next_gen = READ_ONCE(base_crng.generation); + if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || + next_gen != batch->generation) { + _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); + batch->position = 0; + batch->generation = next_gen; + } + + ret = batch->entropy_u64[batch->position]; + batch->entropy_u64[batch->position] = 0; + ++batch->position; + local_unlock_irqrestore(&batched_entropy_u64.lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u64); + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), + .position = UINT_MAX +}; + +u32 get_random_u32(void) +{ + u32 ret; + unsigned long flags; + struct batched_entropy *batch; + static void *previous; + unsigned long next_gen; + + warn_unseeded_randomness(&previous); + + local_lock_irqsave(&batched_entropy_u32.lock, flags); + batch = raw_cpu_ptr(&batched_entropy_u32); + + next_gen = READ_ONCE(base_crng.generation); + if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || + next_gen != batch->generation) { + _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); + batch->position = 0; + batch->generation = next_gen; + } + + ret = batch->entropy_u32[batch->position]; + batch->entropy_u32[batch->position] = 0; + ++batch->position; + local_unlock_irqrestore(&batched_entropy_u32.lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u32); + +/** + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} + +/* + * This function will use the architecture-specific hardware random + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. + */ +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) +{ + size_t left = nbytes; + u8 *p = buf; + + while (left) { + unsigned long v; + size_t chunk = min_t(size_t, left, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + left -= chunk; + } + + return nbytes - left; +} +EXPORT_SYMBOL(get_random_bytes_arch); + +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ +}; + +/* + * Static global variables + */ +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); + +/********************************************************************** + * + * OS independent entropy store. Here are the functions which handle + * storing entropy in an entropy pool. + * + **********************************************************************/ + +static struct { + struct blake2s_state hash; + spinlock_t lock; + unsigned int entropy_count; +} input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), +}; + +static void extract_entropy(void *buf, size_t nbytes); +static bool drain_entropy(void *buf, size_t nbytes); + +static void crng_reseed(void); + +/* + * This function adds bytes into the entropy "pool". It does not + * update the entropy estimate. The caller should call + * credit_entropy_bits if this is appropriate. + */ +static void _mix_pool_bytes(const void *in, size_t nbytes) +{ + blake2s_update(&input_pool.hash, in, nbytes); +} + +static void mix_pool_bytes(const void *in, size_t nbytes) +{ + unsigned long flags; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); +} + +struct fast_pool { + union { + u32 pool32[4]; + u64 pool64[2]; + }; + unsigned long last; + u16 reg_idx; + u8 count; +}; + +/* + * This is a fast mixing routine used by the interrupt randomness + * collector. It's hardcoded for an 128 bit pool and assumes that any + * locks that might be needed are taken by the caller. + */ +static void fast_mix(u32 pool[4]) +{ + u32 a = pool[0], b = pool[1]; + u32 c = pool[2], d = pool[3]; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; - if (!nbytes) - return 0; + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, output, len); + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; - if (copy_to_user(buf, output, len)) - return -EFAULT; - nbytes -= len; - buf += len; - ret += len; + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) - break; - schedule(); - } + pool[0] = a; pool[1] = b; + pool[2] = c; pool[3] = d; +} - chacha20_block(chacha_state, output); - if (unlikely(chacha_state[12] == 0)) - ++chacha_state[13]; +static void credit_entropy_bits(size_t nbits) +{ + unsigned int entropy_count, orig, add; - len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, output, len)) { - ret = -EFAULT; - break; - } + if (!nbits) + return; - nbytes -= len; - buf += len; - ret += len; - } + add = min_t(size_t, nbits, POOL_BITS); - memzero_explicit(chacha_state, sizeof(chacha_state)); - memzero_explicit(output, sizeof(output)); - return ret; + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + + if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) + crng_reseed(); } /********************************************************************* @@ -1044,57 +1273,6 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } -/* - * This function is the exported kernel interface. It returns some - * number of good random numbers, suitable for key generation, seeding - * TCP sequence numbers, etc. It does not rely on the hardware random - * number generator. For random bytes direct from the hardware RNG - * (when available), use get_random_bytes_arch(). In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. - */ -static void _get_random_bytes(void *buf, size_t nbytes) -{ - u32 chacha_state[CHACHA_STATE_WORDS]; - u8 tmp[CHACHA_BLOCK_SIZE]; - size_t len; - - if (!nbytes) - return; - - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, buf, len); - nbytes -= len; - buf += len; - - while (nbytes) { - if (nbytes < CHACHA_BLOCK_SIZE) { - chacha20_block(chacha_state, tmp); - memcpy(buf, tmp, nbytes); - memzero_explicit(tmp, sizeof(tmp)); - break; - } - - chacha20_block(chacha_state, buf); - if (unlikely(chacha_state[12] == 0)) - ++chacha_state[13]; - nbytes -= CHACHA_BLOCK_SIZE; - buf += CHACHA_BLOCK_SIZE; - } - - memzero_explicit(chacha_state, sizeof(chacha_state)); -} - -void get_random_bytes(void *buf, size_t nbytes) -{ - static void *previous; - - warn_unseeded_randomness(&previous); - _get_random_bytes(buf, nbytes); -} -EXPORT_SYMBOL(get_random_bytes); - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1144,33 +1322,6 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -/* - * This function will use the architecture-specific hardware random - * number generator if it is available. It is not recommended for - * use. Use get_random_bytes() instead. It returns the number of - * bytes filled in. - */ -size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) -{ - size_t left = nbytes; - u8 *p = buf; - - while (left) { - unsigned long v; - size_t chunk = min_t(size_t, left, sizeof(unsigned long)); - - if (!arch_get_random_long(&v)) - break; - - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; - } - - return nbytes - left; -} -EXPORT_SYMBOL(get_random_bytes_arch); - static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); static int __init parse_trust_cpu(char *arg) { @@ -1533,129 +1684,6 @@ static int __init random_sysctls_init(void) device_initcall(random_sysctls_init); #endif /* CONFIG_SYSCTL */ -struct batched_entropy { - union { - /* - * We make this 1.5x a ChaCha block, so that we get the - * remaining 32 bytes from fast key erasure, plus one full - * block from the detached ChaCha state. We can increase - * the size of this later if needed so long as we keep the - * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. - */ - u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; - u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; - }; - local_lock_t lock; - unsigned long generation; - unsigned int position; -}; - -/* - * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom. In order to ensure that the randomness - * provided by this function is okay, the function wait_for_random_bytes() - * should be called and return 0 at least once at any point prior. - */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), - .position = UINT_MAX -}; - -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u64.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u64); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u64[batch->position]; - batch->entropy_u64[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u64.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), - .position = UINT_MAX -}; - -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u32.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u32); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u32[batch->position]; - batch->entropy_u32[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u32.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); - -/** - * randomize_page - Generate a random, page aligned address - * @start: The smallest acceptable address the caller will take. - * @range: The size of the area, starting at @start, within which the - * random address must fall. - * - * If @start + @range would overflow, @range is capped. - * - * NOTE: Historical use of randomize_range, which this replaces, presumed that - * @start was already page aligned. We now align it regardless. - * - * Return: A page aligned address within [start, start + range). On error, - * @start is returned. - */ -unsigned long randomize_page(unsigned long start, unsigned long range) -{ - if (!PAGE_ALIGNED(start)) { - range -= PAGE_ALIGN(start) - start; - start = PAGE_ALIGN(start); - } - - if (start > ULONG_MAX - range) - range = ULONG_MAX - start; - - range >>= PAGE_SHIFT; - - if (range == 0) - return start; - - return start + (get_random_long() % range << PAGE_SHIFT); -} - /* Interface for in-kernel drivers of true hardware RNGs. * Those devices may produce endless random bits and will be throttled * when our pool is full. -- GitLab From a5ed7cb1a7732ef11959332d507889fbc39ebbb4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0554/1586] random: group entropy extraction functions This pulls all of the entropy extraction-focused functions into the third labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 216 +++++++++++++++++++++--------------------- 1 file changed, 109 insertions(+), 107 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 99b55e76b0043..ce530a44b4d07 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -895,23 +895,36 @@ size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) } EXPORT_SYMBOL(get_random_bytes_arch); + +/********************************************************************** + * + * Entropy accumulation and extraction routines. + * + * Callers may add entropy via: + * + * static void mix_pool_bytes(const void *in, size_t nbytes) + * + * After which, if added entropy should be credited: + * + * static void credit_entropy_bits(size_t nbits) + * + * Finally, extract entropy via these two, with the latter one + * setting the entropy count to zero and extracting only if there + * is POOL_MIN_BITS entropy credited prior: + * + * static void extract_entropy(void *buf, size_t nbytes) + * static bool drain_entropy(void *buf, size_t nbytes) + * + **********************************************************************/ + enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ }; -/* - * Static global variables - */ +/* For notifying userspace should write into /dev/random. */ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -/********************************************************************** - * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. - * - **********************************************************************/ - static struct { struct blake2s_state hash; spinlock_t lock; @@ -924,28 +937,106 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static void extract_entropy(void *buf, size_t nbytes); -static bool drain_entropy(void *buf, size_t nbytes); - -static void crng_reseed(void); +static void _mix_pool_bytes(const void *in, size_t nbytes) +{ + blake2s_update(&input_pool.hash, in, nbytes); +} /* * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call * credit_entropy_bits if this is appropriate. */ -static void _mix_pool_bytes(const void *in, size_t nbytes) +static void mix_pool_bytes(const void *in, size_t nbytes) { - blake2s_update(&input_pool.hash, in, nbytes); + unsigned long flags; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); } -static void mix_pool_bytes(const void *in, size_t nbytes) +static void credit_entropy_bits(size_t nbits) +{ + unsigned int entropy_count, orig, add; + + if (!nbits) + return; + + add = min_t(size_t, nbits, POOL_BITS); + + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + + if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) + crng_reseed(); +} + +/* + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. + */ +static void extract_entropy(void *buf, size_t nbytes) { unsigned long flags; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdseed[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); + } spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); + + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); + + /* next_key = HASHPRF(seed, RDSEED || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); + + while (nbytes) { + i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDSEED || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); + nbytes -= i; + buf += i; + } + + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); +} + +/* + * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we + * set the entropy count to zero (but don't actually touch any data). Only then + * can we extract a new key with extract_entropy(). + */ +static bool drain_entropy(void *buf, size_t nbytes) +{ + unsigned int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (entropy_count < POOL_MIN_BITS) + return false; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf, nbytes); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + return true; } struct fast_pool { @@ -988,24 +1079,6 @@ static void fast_mix(u32 pool[4]) pool[2] = c; pool[3] = d; } -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (crng_init < 2 && entropy_count >= POOL_MIN_BITS) - crng_reseed(); -} - /********************************************************************* * * Entropy input management @@ -1202,77 +1275,6 @@ void add_disk_randomness(struct gendisk *disk) EXPORT_SYMBOL_GPL(add_disk_randomness); #endif -/********************************************************************* - * - * Entropy extraction routines - * - *********************************************************************/ - -/* - * This is an HKDF-like construction for using the hashed collected entropy - * as a PRF key, that's then expanded block-by-block. - */ -static void extract_entropy(void *buf, size_t nbytes) -{ - unsigned long flags; - u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; - struct { - unsigned long rdseed[32 / sizeof(long)]; - size_t counter; - } block; - size_t i; - - for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { - if (!arch_get_random_seed_long(&block.rdseed[i]) && - !arch_get_random_long(&block.rdseed[i])) - block.rdseed[i] = random_get_entropy(); - } - - spin_lock_irqsave(&input_pool.lock, flags); - - /* seed = HASHPRF(last_key, entropy_input) */ - blake2s_final(&input_pool.hash, seed); - - /* next_key = HASHPRF(seed, RDSEED || 0) */ - block.counter = 0; - blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); - blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); - - spin_unlock_irqrestore(&input_pool.lock, flags); - memzero_explicit(next_key, sizeof(next_key)); - - while (nbytes) { - i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); - /* output = HASHPRF(seed, RDSEED || ++counter) */ - ++block.counter; - blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); - nbytes -= i; - buf += i; - } - - memzero_explicit(seed, sizeof(seed)); - memzero_explicit(&block, sizeof(block)); -} - -/* - * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we - * set the entropy count to zero (but don't actually touch any data). Only then - * can we extract a new key with extract_entropy(). - */ -static bool drain_entropy(void *buf, size_t nbytes) -{ - unsigned int entropy_count; - do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (entropy_count < POOL_MIN_BITS) - return false; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf, nbytes); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - return true; -} - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another -- GitLab From 92c653cf14400946f376a29b828d6af7e01f38dd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0555/1586] random: group entropy collection functions This pulls all of the entropy collection-focused functions into the fourth labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 370 +++++++++++++++++++++++------------------- 1 file changed, 206 insertions(+), 164 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index ce530a44b4d07..75e16e2f07e45 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1039,60 +1039,112 @@ static bool drain_entropy(void *buf, size_t nbytes) return true; } -struct fast_pool { - union { - u32 pool32[4]; - u64 pool64[2]; - }; - unsigned long last; - u16 reg_idx; - u8 count; -}; + +/********************************************************************** + * + * Entropy collection routines. + * + * The following exported functions are used for pushing entropy into + * the above entropy accumulation routines: + * + * void add_device_randomness(const void *buf, size_t size); + * void add_input_randomness(unsigned int type, unsigned int code, + * unsigned int value); + * void add_disk_randomness(struct gendisk *disk); + * void add_hwgenerator_randomness(const void *buffer, size_t count, + * size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t size); + * void add_interrupt_randomness(int irq); + * + * add_device_randomness() adds data to the input pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* credit any actual entropy to + * the pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). + * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The above two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or + * add_device_randomness(), depending on whether or not the configuration + * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the input pool roughly once a second or after 64 + * interrupts, crediting 1 bit of entropy for whichever comes first. + * + **********************************************************************/ + +static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static int __init parse_trust_cpu(char *arg) +{ + return kstrtobool(arg, &trust_cpu); +} +early_param("random.trust_cpu", parse_trust_cpu); /* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. + * The first collection of entropy occurs at system boot while interrupts + * are still turned off. Here we push in RDSEED, a timestamp, and utsname(). + * Depending on the above configuration knob, RDSEED may be considered + * sufficient for initialization. Note that much earlier setup may already + * have pushed entropy into the input pool by the time we get here. */ -static void fast_mix(u32 pool[4]) +int __init rand_initialize(void) { - u32 a = pool[0], b = pool[1]; - u32 c = pool[2], d = pool[3]; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; + size_t i; + ktime_t now = ktime_get_real(); + bool arch_init = true; + unsigned long rv; - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { + if (!arch_get_random_seed_long_early(&rv) && + !arch_get_random_long_early(&rv)) { + rv = random_get_entropy(); + arch_init = false; + } + mix_pool_bytes(&rv, sizeof(rv)); + } + mix_pool_bytes(&now, sizeof(now)); + mix_pool_bytes(utsname(), sizeof(*(utsname()))); - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; + extract_entropy(base_crng.key, sizeof(base_crng.key)); + ++base_crng.generation; - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; + if (arch_init && trust_cpu && crng_init < 2) { + crng_init = 2; + pr_notice("crng init done (trusting CPU's manufacturer)\n"); + } - pool[0] = a; pool[1] = b; - pool[2] = c; pool[3] = d; + if (ratelimit_disable) { + urandom_warning.interval = 0; + unseeded_warning.interval = 0; + } + return 0; } -/********************************************************************* - * - * Entropy input management - * - *********************************************************************/ - /* There is one of these per entropy source */ struct timer_rand_state { cycles_t last_time; long last_delta, last_delta2; }; -#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; - /* * Add device- or boot-specific data to the input pool to help * initialize it. @@ -1116,8 +1168,6 @@ void add_device_randomness(const void *buf, size_t size) } EXPORT_SYMBOL(add_device_randomness); -static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; - /* * This function adds entropy to the entropy "pool" by using timing * delays. It uses the timer_rand_state structure to make an estimate @@ -1179,8 +1229,9 @@ void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; - /* ignore autorepeat and the like */ + /* Ignore autorepeat and the like. */ if (value == last_value) return; @@ -1190,6 +1241,119 @@ void add_input_randomness(unsigned int type, unsigned int code, } EXPORT_SYMBOL_GPL(add_input_randomness); +#ifdef CONFIG_BLOCK +void add_disk_randomness(struct gendisk *disk) +{ + if (!disk || !disk->random) + return; + /* First major is 1, so we get >= 0x200 here. */ + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); +} +EXPORT_SYMBOL_GPL(add_disk_randomness); + +void rand_initialize_disk(struct gendisk *disk) +{ + struct timer_rand_state *state; + + /* + * If kzalloc returns null, we just won't use that entropy + * source. + */ + state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); + if (state) { + state->last_time = INITIAL_JIFFIES; + disk->random = state; + } +} +#endif + +/* + * Interface for in-kernel drivers of true hardware RNGs. + * Those devices may produce endless random bits and will be throttled + * when our pool is full. + */ +void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy) +{ + if (unlikely(crng_init == 0)) { + size_t ret = crng_fast_load(buffer, count); + mix_pool_bytes(buffer, ret); + count -= ret; + buffer += ret; + if (!count || crng_init == 0) + return; + } + + /* + * Throttle writing if we're above the trickle threshold. + * We'll be woken up again once below POOL_MIN_BITS, when + * the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has elapsed. + */ + wait_event_interruptible_timeout(random_write_wait, + !system_wq || kthread_should_stop() || + input_pool.entropy_count < POOL_MIN_BITS, + CRNG_RESEED_INTERVAL); + mix_pool_bytes(buffer, count); + credit_entropy_bits(entropy); +} +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* + * Handle random seed passed by bootloader. + * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise + * it would be regarded as device data. + * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + */ +void add_bootloader_randomness(const void *buf, size_t size) +{ + if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) + add_hwgenerator_randomness(buf, size, size * 8); + else + add_device_randomness(buf, size); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +struct fast_pool { + union { + u32 pool32[4]; + u64 pool64[2]; + }; + unsigned long last; + u16 reg_idx; + u8 count; +}; + +/* + * This is a fast mixing routine used by the interrupt randomness + * collector. It's hardcoded for an 128 bit pool and assumes that any + * locks that might be needed are taken by the caller. + */ +static void fast_mix(u32 pool[4]) +{ + u32 a = pool[0], b = pool[1]; + u32 c = pool[2], d = pool[3]; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; + + pool[0] = a; pool[1] = b; + pool[2] = c; pool[3] = d; +} + static DEFINE_PER_CPU(struct fast_pool, irq_randomness); static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) @@ -1259,22 +1423,11 @@ void add_interrupt_randomness(int irq) fast_pool->count = 0; - /* award one bit for the contents of the fast pool */ + /* Award one bit for the contents of the fast pool. */ credit_entropy_bits(1); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); -#ifdef CONFIG_BLOCK -void add_disk_randomness(struct gendisk *disk) -{ - if (!disk || !disk->random) - return; - /* first major is 1, so we get >= 0x200 here */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); -} -EXPORT_SYMBOL_GPL(add_disk_randomness); -#endif - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1324,73 +1477,6 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static int __init parse_trust_cpu(char *arg) -{ - return kstrtobool(arg, &trust_cpu); -} -early_param("random.trust_cpu", parse_trust_cpu); - -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) -{ - size_t i; - ktime_t now = ktime_get_real(); - bool arch_init = true; - unsigned long rv; - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - mix_pool_bytes(&rv, sizeof(rv)); - } - mix_pool_bytes(&now, sizeof(now)); - mix_pool_bytes(utsname(), sizeof(*(utsname()))); - - extract_entropy(base_crng.key, sizeof(base_crng.key)); - ++base_crng.generation; - - if (arch_init && trust_cpu && crng_init < 2) { - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } - - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } - return 0; -} - -#ifdef CONFIG_BLOCK -void rand_initialize_disk(struct gendisk *disk) -{ - struct timer_rand_state *state; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) { - state->last_time = INITIAL_JIFFIES; - disk->random = state; - } -} -#endif - static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -1685,47 +1771,3 @@ static int __init random_sysctls_init(void) } device_initcall(random_sysctls_init); #endif /* CONFIG_SYSCTL */ - -/* Interface for in-kernel drivers of true hardware RNGs. - * Those devices may produce endless random bits and will be throttled - * when our pool is full. - */ -void add_hwgenerator_randomness(const void *buffer, size_t count, - size_t entropy) -{ - if (unlikely(crng_init == 0)) { - size_t ret = crng_fast_load(buffer, count); - mix_pool_bytes(buffer, ret); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; - } - - /* Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below POOL_MIN_BITS, when - * the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has elapsed. - */ - wait_event_interruptible_timeout(random_write_wait, - !system_wq || kthread_should_stop() || - input_pool.entropy_count < POOL_MIN_BITS, - CRNG_RESEED_INTERVAL); - mix_pool_bytes(buffer, count); - credit_entropy_bits(entropy); -} -EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); - -/* Handle random seed passed by bootloader. - * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise - * it would be regarded as device data. - * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. - */ -void add_bootloader_randomness(const void *buf, size_t size) -{ - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) - add_hwgenerator_randomness(buf, size, size * 8); - else - add_device_randomness(buf, size); -} -EXPORT_SYMBOL_GPL(add_bootloader_randomness); -- GitLab From a6adf8e7a605250b911e94793fd077933709ff9e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0556/1586] random: group userspace read/write functions This pulls all of the userspace read/write-focused functions into the fifth labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 125 ++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 75e16e2f07e45..f43d12354c8d5 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1477,30 +1477,61 @@ static void try_to_generate_entropy(void) mix_pool_bytes(&stack.now, sizeof(stack.now)); } -static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) + +/********************************************************************** + * + * Userspace reader/writer interfaces. + * + * getrandom(2) is the primary modern interface into the RNG and should + * be used in preference to anything else. + * + * Reading from /dev/random has the same functionality as calling + * getrandom(2) with flags=0. In earlier versions, however, it had + * vastly different semantics and should therefore be avoided, to + * prevent backwards compatibility issues. + * + * Reading from /dev/urandom has the same functionality as calling + * getrandom(2) with flags=GRND_INSECURE. Because it does not block + * waiting for the RNG to be ready, it should not be used. + * + * Writing to either /dev/random or /dev/urandom adds entropy to + * the input pool but does not credit it. + * + * Polling on /dev/random indicates when the RNG is initialized, on + * the read side, and when it wants new entropy, on the write side. + * + * Both /dev/random and /dev/urandom have the same set of ioctls for + * adding entropy, getting the entropy count, zeroing the count, and + * reseeding the crng. + * + **********************************************************************/ + +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, + flags) { - static int maxwarn = 10; + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) + return -EINVAL; - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); - } + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) + return -EINVAL; - return get_random_bytes_user(buf, nbytes); -} + if (count > INT_MAX) + count = INT_MAX; -static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - int ret; + if (!(flags & GRND_INSECURE) && !crng_ready()) { + int ret; - ret = wait_for_random_bytes(); - if (ret != 0) - return ret; - return get_random_bytes_user(buf, nbytes); + if (flags & GRND_NONBLOCK) + return -EAGAIN; + ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + } + return get_random_bytes_user(buf, count); } static __poll_t random_poll(struct file *file, poll_table *wait) @@ -1552,6 +1583,32 @@ static ssize_t random_write(struct file *file, const char __user *buffer, return (ssize_t)count; } +static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + static int maxwarn = 10; + + if (!crng_ready() && maxwarn > 0) { + maxwarn--; + if (__ratelimit(&urandom_warning)) + pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", + current->comm, nbytes); + } + + return get_random_bytes_user(buf, nbytes); +} + +static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + int ret; + + ret = wait_for_random_bytes(); + if (ret != 0) + return ret; + return get_random_bytes_user(buf, nbytes); +} + static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int size, ent_count; @@ -1560,7 +1617,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: - /* inherently racy, no point locking */ + /* Inherently racy, no point locking. */ if (put_user(input_pool.entropy_count, p)) return -EFAULT; return 0; @@ -1636,34 +1693,6 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, - flags) -{ - if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) - return -EINVAL; - - /* - * Requesting insecure and blocking randomness at the same time makes - * no sense. - */ - if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) - return -EINVAL; - - if (count > INT_MAX) - count = INT_MAX; - - if (!(flags & GRND_INSECURE) && !crng_ready()) { - int ret; - - if (flags & GRND_NONBLOCK) - return -EAGAIN; - ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; - } - return get_random_bytes_user(buf, count); -} - /******************************************************************** * * Sysctl interface -- GitLab From 0deff3c43206c24e746b1410f11125707ad3040e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:53:34 +0100 Subject: [PATCH 0557/1586] random: group sysctl functions This pulls all of the sysctl-focused functions into the sixth labeled section. No functional changes. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index f43d12354c8d5..a4b442032db75 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1693,9 +1693,34 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; + /******************************************************************** * - * Sysctl interface + * Sysctl interface. + * + * These are partly unused legacy knobs with dummy values to not break + * userspace and partly still useful things. They are usually accessible + * in /proc/sys/kernel/random/ and are as follows: + * + * - boot_id - a UUID representing the current boot. + * + * - uuid - a random UUID, different each time the file is read. + * + * - poolsize - the number of bits of entropy that the input pool can + * hold, tied to the POOL_BITS constant. + * + * - entropy_avail - the number of bits of entropy currently in the + * input pool. Always <= poolsize. + * + * - write_wakeup_threshold - the amount of entropy in the input pool + * below which write polls to /dev/random will unblock, requesting + * more entropy, tied to the POOL_MIN_BITS constant. It is writable + * to avoid breaking old userspaces, but writing to it does not + * change any behavior of the RNG. + * + * - urandom_min_reseed_secs - fixed to the meaningless value "60". + * It is writable to avoid breaking old userspaces, but writing + * to it does not change any behavior of the RNG. * ********************************************************************/ @@ -1703,8 +1728,8 @@ const struct file_operations urandom_fops = { #include -static int random_min_urandom_seed = 60; -static int random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_random_min_urandom_seed = 60; +static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; static int sysctl_poolsize = POOL_BITS; static char sysctl_bootid[16]; @@ -1761,14 +1786,14 @@ static struct ctl_table random_table[] = { }, { .procname = "write_wakeup_threshold", - .data = &random_write_wakeup_bits, + .data = &sysctl_random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "urandom_min_reseed_secs", - .data = &random_min_urandom_seed, + .data = &sysctl_random_min_urandom_seed, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -1799,4 +1824,4 @@ static int __init random_sysctls_init(void) return 0; } device_initcall(random_sysctls_init); -#endif /* CONFIG_SYSCTL */ +#endif -- GitLab From 5f75d9f3babea8ae0a2d06724656874f41d317f5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 11 Feb 2022 12:29:33 +0100 Subject: [PATCH 0558/1586] random: rewrite header introductory comment Now that we've re-documented the various sections, we can remove the outdated text here and replace it with a high-level overview. Cc: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 183 +++++------------------------------------- 1 file changed, 21 insertions(+), 162 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index a4b442032db75..e0764e8407911 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2,168 +2,27 @@ /* * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. * Copyright Matt Mackall , 2003, 2004, 2005 - * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All - * rights reserved. - */ - -/* - * Exported interfaces ---- output - * =============================== - * - * There are four exported interfaces; two for use within the kernel, - * and two for use from userspace. - * - * Exported interfaces ---- userspace output - * ----------------------------------------- - * - * The userspace interfaces are two character devices /dev/random and - * /dev/urandom. /dev/random is suitable for use when very high - * quality randomness is desired (for example, for key generation or - * one-time pads), as it will only return a maximum of the number of - * bits of randomness (as estimated by the random number generator) - * contained in the entropy pool. - * - * The /dev/urandom device does not have this limit, and will return - * as many bytes as are requested. As more and more random bytes are - * requested without giving time for the entropy pool to recharge, - * this will result in random numbers that are merely cryptographically - * strong. For many applications, however, this is acceptable. - * - * Exported interfaces ---- kernel output - * -------------------------------------- - * - * The primary kernel interfaces are: - * - * void get_random_bytes(void *buf, size_t nbytes); - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() - * - * These interfaces will return the requested number of random bytes - * into the given buffer or as a return value. This is equivalent to a - * read from /dev/urandom. The get_random_{u32,u64,int,long}() family - * of functions may be higher performance for one-off random integers, - * because they do a bit of buffering. - * - * prandom_u32() - * ------------- - * - * For even weaker applications, see the pseudorandom generator - * prandom_u32(), prandom_max(), and prandom_bytes(). If the random - * numbers aren't security-critical at all, these are *far* cheaper. - * Useful for self-tests, random error simulation, randomized backoffs, - * and any other application where you trust that nobody is trying to - * maliciously mess with you by guessing the "random" numbers. - * - * Exported interfaces ---- input - * ============================== - * - * The current exported interfaces for gathering environmental noise - * from the devices are: - * - * void add_device_randomness(const void *buf, size_t size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_interrupt_randomness(int irq); - * void add_disk_randomness(struct gendisk *disk); - * void add_hwgenerator_randomness(const void *buffer, size_t count, - * size_t entropy); - * void add_bootloader_randomness(const void *buf, size_t size); - * - * add_device_randomness() is for adding data to the random pool that - * is likely to differ between two devices (or possibly even per boot). - * This would be things like MAC addresses or serial numbers, or the - * read-out of the RTC. This does *not* add any actual entropy to the - * pool, but it initializes the pool to different values for devices - * that might otherwise be identical and have very little entropy - * available to them (particularly common in the embedded world). - * - * add_input_randomness() uses the input layer interrupt timing, as well as - * the event type information from the hardware. - * - * add_interrupt_randomness() uses the interrupt timing as random - * inputs to the entropy pool. Using the cycle counters and the irq source - * as inputs, it feeds the randomness roughly once a second. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * All of these routines try to estimate how many bits of randomness a - * particular randomness source. They do this by keeping track of the - * first and second order deltas of the event timings. - * - * add_hwgenerator_randomness() is for true hardware RNGs, and will credit - * entropy as specified by the caller. If the entropy pool is full it will - * block until more entropy is needed. - * - * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or - * add_device_randomness(), depending on whether or not the configuration - * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. - * - * Ensuring unpredictability at system startup - * ============================================ - * - * When any operating system starts up, it will go through a sequence - * of actions that are fairly predictable by an adversary, especially - * if the start-up does not involve interaction with a human operator. - * This reduces the actual number of bits of unpredictability in the - * entropy pool below the value in entropy_count. In order to - * counteract this effect, it helps to carry information in the - * entropy pool across shut-downs and start-ups. To do this, put the - * following lines an appropriate script which is run during the boot - * sequence: - * - * echo "Initializing random number generator..." - * random_seed=/var/run/random-seed - * # Carry a random seed from start-up to start-up - * # Load and then save the whole entropy pool - * if [ -f $random_seed ]; then - * cat $random_seed >/dev/urandom - * else - * touch $random_seed - * fi - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * and the following lines in an appropriate script which is run as - * the system is shutdown: - * - * # Carry a random seed from shut-down to start-up - * # Save the whole entropy pool - * echo "Saving random seed..." - * random_seed=/var/run/random-seed - * touch $random_seed - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * For example, on most modern systems using the System V init - * scripts, such code fragments would be found in - * /etc/rc.d/init.d/random. On older Linux systems, the correct script - * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0. - * - * Effectively, these commands cause the contents of the entropy pool - * to be saved at shut-down time and reloaded into the entropy pool at - * start-up. (The 'dd' in the addition to the bootup script is to - * make sure that /etc/random-seed is different for every start-up, - * even if the system crashes without executing rc.0.) Even with - * complete knowledge of the start-up activities, predicting the state - * of the entropy pool requires knowledge of the previous history of - * the system. - * - * Configuring the /dev/random driver under Linux - * ============================================== - * - * The /dev/random driver under Linux uses minor numbers 8 and 9 of - * the /dev/mem major number (#1). So if your system does not have - * /dev/random and /dev/urandom created already, they can be created - * by using the commands: - * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 + * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. + * + * This driver produces cryptographically secure pseudorandom data. It is divided + * into roughly six sections, each with a section header: + * + * - Initialization and readiness waiting. + * - Fast key erasure RNG, the "crng". + * - Entropy accumulation and extraction routines. + * - Entropy collection routines. + * - Userspace reader/writer interfaces. + * - Sysctl interface. + * + * The high level overview is that there is one input pool, into which + * various pieces of data are hashed. Some of that data is then "credited" as + * having a certain number of bits of entropy. When enough bits of entropy are + * available, the hash is finalized and handed as a key to a stream cipher that + * expands it indefinitely for various consumers. This key is periodically + * refreshed as the various entropy collectors, described below, add data to the + * input pool and credit it. There is currently no Fortuna-like scheduler + * involved, which can lead to malicious entropy sources causing a premature + * reseed, and the entropy estimates are, at best, conservative guesses. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -- GitLab From 58340f8e952b613e0ead0bed58b97b05bf4743c5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Feb 2022 16:15:46 +0100 Subject: [PATCH 0559/1586] random: defer fast pool mixing to worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On PREEMPT_RT, it's problematic to take spinlocks from hard irq handlers. We can fix this by deferring to a workqueue the dumping of the fast pool into the input pool. We accomplish this with some careful rules on fast_pool->count: - When it's incremented to >= 64, we schedule the work. - If the top bit is set, we never schedule the work, even if >= 64. - The worker is responsible for setting it back to 0 when it's done. There are two small issues around using workqueues for this purpose that we work around. The first issue is that mix_interrupt_randomness() might be migrated to another CPU during CPU hotplug. This issue is rectified by checking that it hasn't been migrated (after disabling irqs). If it has been migrated, then we set the count to zero, so that when the CPU comes online again, it can requeue the work. As part of this, we switch to using an atomic_t, so that the increment in the irq handler doesn't wipe out the zeroing if the CPU comes back online while this worker is running. The second issue is that, though relatively minor in effect, we probably want to make sure we get a consistent view of the pool onto the stack, in case it's interrupted by an irq while reading. To do this, we don't reenable irqs until after the copy. There are only 18 instructions between the cli and sti, so this is a pretty tiny window. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Theodore Ts'o Cc: Jonathan Neuschäfer Acked-by: Sebastian Andrzej Siewior Reviewed-by: Sultan Alsawaf Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 63 +++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e0764e8407911..26cbfcaa3b783 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1178,9 +1178,10 @@ struct fast_pool { u32 pool32[4]; u64 pool64[2]; }; + struct work_struct mix; unsigned long last; + atomic_t count; u16 reg_idx; - u8 count; }; /* @@ -1230,12 +1231,49 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *ptr; } +static void mix_interrupt_randomness(struct work_struct *work) +{ + struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); + u32 pool[4]; + + /* Check to see if we're running on the wrong CPU due to hotplug. */ + local_irq_disable(); + if (fast_pool != this_cpu_ptr(&irq_randomness)) { + local_irq_enable(); + /* + * If we are unlucky enough to have been moved to another CPU, + * during CPU hotplug while the CPU was shutdown then we set + * our count to zero atomically so that when the CPU comes + * back online, it can enqueue work again. The _release here + * pairs with the atomic_inc_return_acquire in + * add_interrupt_randomness(). + */ + atomic_set_release(&fast_pool->count, 0); + return; + } + + /* + * Copy the pool to the stack so that the mixer always has a + * consistent view, before we reenable irqs again. + */ + memcpy(pool, fast_pool->pool32, sizeof(pool)); + atomic_set(&fast_pool->count, 0); + fast_pool->last = jiffies; + local_irq_enable(); + + mix_pool_bytes(pool, sizeof(pool)); + credit_entropy_bits(1); + memzero_explicit(pool, sizeof(pool)); +} + void add_interrupt_randomness(int irq) { + enum { MIX_INFLIGHT = 1U << 31 }; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); + unsigned int new_count; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1255,12 +1293,13 @@ void add_interrupt_randomness(int irq) } fast_mix(fast_pool->pool32); - ++fast_pool->count; + /* The _acquire here pairs with the atomic_set_release in mix_interrupt_randomness(). */ + new_count = (unsigned int)atomic_inc_return_acquire(&fast_pool->count); if (unlikely(crng_init == 0)) { - if (fast_pool->count >= 64 && + if (new_count >= 64 && crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) { - fast_pool->count = 0; + atomic_set(&fast_pool->count, 0); fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); @@ -1270,20 +1309,16 @@ void add_interrupt_randomness(int irq) return; } - if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ)) + if (new_count & MIX_INFLIGHT) return; - if (!spin_trylock(&input_pool.lock)) + if (new_count < 64 && !time_after(now, fast_pool->last + HZ)) return; - fast_pool->last = now; - _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); - spin_unlock(&input_pool.lock); - - fast_pool->count = 0; - - /* Award one bit for the contents of the fast pool. */ - credit_entropy_bits(1); + if (unlikely(!fast_pool->mix.func)) + INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); + atomic_or(MIX_INFLIGHT, &fast_pool->count); + queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); -- GitLab From afba0b80b977b2a8f16234f2acd982f82710ba33 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 12 Feb 2022 01:26:17 +0100 Subject: [PATCH 0560/1586] random: do not take pool spinlock at boot Since rand_initialize() is run while interrupts are still off and nothing else is running, we don't need to repeatedly take and release the pool spinlock, especially in the RDSEED loop. Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 26cbfcaa3b783..0541066e2e5c1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -978,10 +978,10 @@ int __init rand_initialize(void) rv = random_get_entropy(); arch_init = false; } - mix_pool_bytes(&rv, sizeof(rv)); + _mix_pool_bytes(&rv, sizeof(rv)); } - mix_pool_bytes(&now, sizeof(now)); - mix_pool_bytes(utsname(), sizeof(*(utsname()))); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(utsname(), sizeof(*(utsname()))); extract_entropy(base_crng.key, sizeof(base_crng.key)); ++base_crng.generation; -- GitLab From da792c6d5f59a76c10a310c5d4c93428fd18f996 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 12 Feb 2022 23:54:09 +0100 Subject: [PATCH 0561/1586] random: unify early init crng load accounting crng_fast_load() and crng_slow_load() have different semantics: - crng_fast_load() xors and accounts with crng_init_cnt. - crng_slow_load() hashes and doesn't account. However add_hwgenerator_randomness() can afford to hash (it's called from a kthread), and it should account. Additionally, ones that can afford to hash don't need to take a trylock but can take a normal lock. So, we combine these into one function, crng_pre_init_inject(), which allows us to control these in a uniform way. This will make it simpler later to simplify this all down when the time comes for that. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 112 ++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0541066e2e5c1..93d01f918720f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -386,7 +386,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not * ready, we do fast key erasure with the base_crng directly, because - * this is what crng_{fast,slow}_load mutate during early init. + * this is what crng_pre_init_inject() mutates during early init. */ if (unlikely(!crng_ready())) { bool ready; @@ -437,72 +437,75 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], } /* - * This function is for crng_init == 0 only. - * - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. Returns the number of - * bytes processed from cp. + * This function is for crng_init == 0 only. It loads entropy directly + * into the crng's key, without going through the input pool. It is, + * generally speaking, not very safe, but we use this only at early + * boot time when it's better to have something there rather than + * nothing. + * + * There are two paths, a slow one and a fast one. The slow one + * hashes the input along with the current key. The fast one simply + * xors it in, and should only be used from interrupt context. + * + * If account is set, then the crng_init_cnt counter is incremented. + * This shouldn't be set by functions like add_device_randomness(), + * where we can't trust the buffer passed to it is guaranteed to be + * unpredictable (so it might not have any entropy at all). + * + * Returns the number of bytes processed from input, which is bounded + * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_fast_load(const void *cp, size_t len) +static size_t crng_pre_init_inject(const void *input, size_t len, + bool fast, bool account) { static int crng_init_cnt = 0; unsigned long flags; - const u8 *src = (const u8 *)cp; - size_t ret = 0; - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; + if (fast) { + if (!spin_trylock_irqsave(&base_crng.lock, flags)) + return 0; + } else { + spin_lock_irqsave(&base_crng.lock, flags); + } + if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); return 0; } - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src; - src++; crng_init_cnt++; len--; ret++; - } - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; - crng_init = 1; - } - spin_unlock_irqrestore(&base_crng.lock, flags); - if (crng_init == 1) - pr_notice("fast init done\n"); - return ret; -} -/* - * This function is for crng_init == 0 only. - * - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So, we simply hash the contents in with the current key. Finally, - * we do *not* advance crng_init_cnt since buffer we may get may be - * something like a fixed DMI table (for example), which might very - * well be unique to the machine, but is otherwise unvarying. - */ -static void crng_slow_load(const void *cp, size_t len) -{ - unsigned long flags; - struct blake2s_state hash; + if (account) + len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - blake2s_init(&hash, sizeof(base_crng.key)); + if (fast) { + const u8 *src = input; + size_t i; - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return; - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return; + for (i = 0; i < len; ++i) + base_crng.key[(crng_init_cnt + i) % + sizeof(base_crng.key)] ^= src[i]; + } else { + struct blake2s_state hash; + + blake2s_init(&hash, sizeof(base_crng.key)); + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, input, len); + blake2s_final(&hash, base_crng.key); } - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, cp, len); - blake2s_final(&hash, base_crng.key); + if (account) { + crng_init_cnt += len; + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + ++base_crng.generation; + crng_init = 1; + } + } spin_unlock_irqrestore(&base_crng.lock, flags); + + if (crng_init == 1) + pr_notice("fast init done\n"); + + return len; } static void _get_random_bytes(void *buf, size_t nbytes) @@ -1018,7 +1021,7 @@ void add_device_randomness(const void *buf, size_t size) unsigned long flags; if (!crng_ready() && size) - crng_slow_load(buf, size); + crng_pre_init_inject(buf, size, false, false); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(buf, size); @@ -1135,7 +1138,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { - size_t ret = crng_fast_load(buffer, count); + size_t ret = crng_pre_init_inject(buffer, count, false, true); mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; @@ -1298,7 +1301,8 @@ void add_interrupt_randomness(int irq) if (unlikely(crng_init == 0)) { if (new_count >= 64 && - crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) { + crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32), + true, true) > 0) { atomic_set(&fast_pool->count, 0); fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { -- GitLab From 1daf2f387652bf3a7044aea042f5023b3f6b189b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 12 Feb 2022 23:57:38 +0100 Subject: [PATCH 0562/1586] random: check for crng_init == 0 in add_device_randomness() This has no real functional change, as crng_pre_init_inject() (and before that, crng_slow_init()) always checks for == 0, not >= 2. So correct the outer unlocked change to reflect that. Before this used crng_ready(), which was not correct. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 93d01f918720f..bca4467e540f7 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1020,7 +1020,7 @@ void add_device_randomness(const void *buf, size_t size) unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; - if (!crng_ready() && size) + if (crng_init == 0 && size) crng_pre_init_inject(buf, size, false, false); spin_lock_irqsave(&input_pool.lock, flags); -- GitLab From b777c38239fec5a528e59f55b379e31b1a187524 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 16:17:01 +0100 Subject: [PATCH 0563/1586] random: pull add_hwgenerator_randomness() declaration into random.h add_hwgenerator_randomness() is a function implemented and documented inside of random.c. It is the way that hardware RNGs push data into it. Therefore, it should be declared in random.h. Otherwise sparse complains with: random.c:1137:6: warning: symbol 'add_hwgenerator_randomness' was not declared. Should it be static? The alternative would be to include hw_random.h into random.c, but that wouldn't really be good for anything except slowing down compile time. Cc: Matt Mackall Cc: Theodore Ts'o Acked-by: Herbert Xu Reviewed-by: Eric Biggers Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/hw_random/core.c | 1 + include/linux/hw_random.h | 2 -- include/linux/random.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a3db27916256d..cfb085de876b7 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 1a9fc38f8938c..aa1d4da03538b 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/random.h b/include/linux/random.h index 37e1e8c43d7ee..d7354de9351e9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -32,6 +32,8 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; +extern void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy); extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); -- GitLab From 3191dd5a1179ef0fad5a050a1702ae98b6251e8f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 22:48:04 +0100 Subject: [PATCH 0564/1586] random: clear fast pool, crng, and batches in cpuhp bring up For the irq randomness fast pool, rather than having to use expensive atomics, which were visibly the most expensive thing in the entire irq handler, simply take care of the extreme edge case of resetting count to zero in the cpuhp online handler, just after workqueues have been reenabled. This simplifies the code a bit and lets us use vanilla variables rather than atomics, and performance should be improved. As well, very early on when the CPU comes up, while interrupts are still disabled, we clear out the per-cpu crng and its batches, so that it always starts with fresh randomness. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Theodore Ts'o Cc: Sultan Alsawaf Cc: Dominik Brodowski Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 62 +++++++++++++++++++++++++++++--------- include/linux/cpuhotplug.h | 2 ++ include/linux/random.h | 5 +++ kernel/cpu.c | 11 +++++++ 4 files changed, 65 insertions(+), 15 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index bca4467e540f7..d73a75cbe82d6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -698,6 +698,25 @@ u32 get_random_u32(void) } EXPORT_SYMBOL(get_random_u32); +#ifdef CONFIG_SMP +/* + * This function is called when the CPU is coming up, with entry + * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. + */ +int random_prepare_cpu(unsigned int cpu) +{ + /* + * When the cpu comes back online, immediately invalidate both + * the per-cpu crng and all batches, so that we serve fresh + * randomness. + */ + per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; + per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; + per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; + return 0; +} +#endif + /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. @@ -1183,7 +1202,7 @@ struct fast_pool { }; struct work_struct mix; unsigned long last; - atomic_t count; + unsigned int count; u16 reg_idx; }; @@ -1219,6 +1238,29 @@ static void fast_mix(u32 pool[4]) static DEFINE_PER_CPU(struct fast_pool, irq_randomness); +#ifdef CONFIG_SMP +/* + * This function is called when the CPU has just come online, with + * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. + */ +int random_online_cpu(unsigned int cpu) +{ + /* + * During CPU shutdown and before CPU onlining, add_interrupt_ + * randomness() may schedule mix_interrupt_randomness(), and + * set the MIX_INFLIGHT flag. However, because the worker can + * be scheduled on a different CPU during this period, that + * flag will never be cleared. For that reason, we zero out + * the flag here, which runs just after workqueues are onlined + * for the CPU again. This also has the effect of setting the + * irq randomness count to zero so that new accumulated irqs + * are fresh. + */ + per_cpu_ptr(&irq_randomness, cpu)->count = 0; + return 0; +} +#endif + static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { u32 *ptr = (u32 *)regs; @@ -1243,15 +1285,6 @@ static void mix_interrupt_randomness(struct work_struct *work) local_irq_disable(); if (fast_pool != this_cpu_ptr(&irq_randomness)) { local_irq_enable(); - /* - * If we are unlucky enough to have been moved to another CPU, - * during CPU hotplug while the CPU was shutdown then we set - * our count to zero atomically so that when the CPU comes - * back online, it can enqueue work again. The _release here - * pairs with the atomic_inc_return_acquire in - * add_interrupt_randomness(). - */ - atomic_set_release(&fast_pool->count, 0); return; } @@ -1260,7 +1293,7 @@ static void mix_interrupt_randomness(struct work_struct *work) * consistent view, before we reenable irqs again. */ memcpy(pool, fast_pool->pool32, sizeof(pool)); - atomic_set(&fast_pool->count, 0); + fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); @@ -1296,14 +1329,13 @@ void add_interrupt_randomness(int irq) } fast_mix(fast_pool->pool32); - /* The _acquire here pairs with the atomic_set_release in mix_interrupt_randomness(). */ - new_count = (unsigned int)atomic_inc_return_acquire(&fast_pool->count); + new_count = ++fast_pool->count; if (unlikely(crng_init == 0)) { if (new_count >= 64 && crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32), true, true) > 0) { - atomic_set(&fast_pool->count, 0); + fast_pool->count = 0; fast_pool->last = now; if (spin_trylock(&input_pool.lock)) { _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); @@ -1321,7 +1353,7 @@ void add_interrupt_randomness(int irq) if (unlikely(!fast_pool->mix.func)) INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); - atomic_or(MIX_INFLIGHT, &fast_pool->count); + fast_pool->count |= MIX_INFLIGHT; queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d4..481e565cc5c42 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,6 +100,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, + CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -240,6 +241,7 @@ enum cpuhp_state { CPUHP_AP_PERF_CSKY_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, + CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/random.h b/include/linux/random.h index d7354de9351e9..6148b8d1ccf34 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -156,4 +156,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif +#ifdef CONFIG_SMP +extern int random_prepare_cpu(unsigned int cpu); +extern int random_online_cpu(unsigned int cpu); +#endif + #endif /* _LINUX_RANDOM_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 407a2568f35eb..238cba15449f6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -1659,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, + [CPUHP_RANDOM_PREPARE] = { + .name = "random:prepare", + .startup.single = random_prepare_cpu, + .teardown.single = NULL, + }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, @@ -1782,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, + [CPUHP_AP_RANDOM_ONLINE] = { + .name = "random:online", + .startup.single = random_online_cpu, + .teardown.single = NULL, + }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, -- GitLab From cd3bc044af483422cc81a93f23c78c20c978b17c Mon Sep 17 00:00:00 2001 From: Yael Tzur Date: Tue, 15 Feb 2022 09:19:53 -0500 Subject: [PATCH 0565/1586] KEYS: encrypted: Instantiate key with user-provided decrypted data For availability and performance reasons master keys often need to be released outside of a Key Management Service (KMS) to clients. It would be beneficial to provide a mechanism where the wrapping/unwrapping of data encryption keys (DEKs) is not dependent on a remote call at runtime yet security is not (or only minimally) compromised. Master keys could be securely stored in the Kernel and be used to wrap/unwrap keys from Userspace. The encrypted.c class supports instantiation of encrypted keys with either an already-encrypted key material, or by generating new key material based on random numbers. This patch defines a new datablob format: [] that allows to inject and encrypt user-provided decrypted data. The decrypted data must be hex-ascii encoded. Signed-off-by: Yael Tzur Reviewed-by: Mimi Zohar Reviewed-by: Sumit Garg Reviewed-by: Jarkko Sakkinen Signed-off-by: Mimi Zohar --- .../security/keys/trusted-encrypted.rst | 25 +++++-- security/keys/Kconfig | 19 +++-- security/keys/encrypted-keys/encrypted.c | 71 ++++++++++++++----- 3 files changed, 86 insertions(+), 29 deletions(-) diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst index 80d5a5af62a1d..f614dad7de12f 100644 --- a/Documentation/security/keys/trusted-encrypted.rst +++ b/Documentation/security/keys/trusted-encrypted.rst @@ -107,12 +107,13 @@ Encrypted Keys -------------- Encrypted keys do not depend on a trust source, and are faster, as they use AES -for encryption/decryption. New keys are created from kernel-generated random -numbers, and are encrypted/decrypted using a specified ‘master’ key. The -‘master’ key can either be a trusted-key or user-key type. The main disadvantage -of encrypted keys is that if they are not rooted in a trusted key, they are only -as secure as the user key encrypting them. The master user key should therefore -be loaded in as secure a way as possible, preferably early in boot. +for encryption/decryption. New keys are created either from kernel-generated +random numbers or user-provided decrypted data, and are encrypted/decrypted +using a specified ‘master’ key. The ‘master’ key can either be a trusted-key or +user-key type. The main disadvantage of encrypted keys is that if they are not +rooted in a trusted key, they are only as secure as the user key encrypting +them. The master user key should therefore be loaded in as secure a way as +possible, preferably early in boot. Usage @@ -199,6 +200,8 @@ Usage:: keyctl add encrypted name "new [format] key-type:master-key-name keylen" ring + keyctl add encrypted name "new [format] key-type:master-key-name keylen + decrypted-data" ring keyctl add encrypted name "load hex_blob" ring keyctl update keyid "update key-type:master-key-name" @@ -303,6 +306,16 @@ Load an encrypted key "evm" from saved blob:: 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc +Instantiate an encrypted key "evm" using user-provided decrypted data:: + + $ keyctl add encrypted evm "new default user:kmk 32 `cat evm_decrypted_data.blob`" @u + 794890253 + + $ keyctl print 794890253 + default user:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382d + bbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0247 + 17c64 5972dcb82ab2dde83376d82b2e3c09ffc + Other uses for trusted and encrypted keys, such as for disk and file encryption are anticipated. In particular the new format 'ecryptfs' has been defined in order to use encrypted keys to mount an eCryptfs filesystem. More details diff --git a/security/keys/Kconfig b/security/keys/Kconfig index 969122c7b92f4..0e30b361e1c1e 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -98,10 +98,21 @@ config ENCRYPTED_KEYS select CRYPTO_RNG help This option provides support for create/encrypting/decrypting keys - in the kernel. Encrypted keys are kernel generated random numbers, - which are encrypted/decrypted with a 'master' symmetric key. The - 'master' key can be either a trusted-key or user-key type. - Userspace only ever sees/stores encrypted blobs. + in the kernel. Encrypted keys are instantiated using kernel + generated random numbers or provided decrypted data, and are + encrypted/decrypted with a 'master' symmetric key. The 'master' + key can be either a trusted-key or user-key type. Only encrypted + blobs are ever output to Userspace. + + If you are unsure as to whether this is required, answer N. + +config USER_DECRYPTED_DATA + bool "Allow encrypted keys with user decrypted data" + depends on ENCRYPTED_KEYS + help + This option provides support for instantiating encrypted keys using + user-provided decrypted data. The decrypted data must be hex-ascii + encoded. If you are unsure as to whether this is required, answer N. diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 87432b35d7713..e05cfc2e49aeb 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -78,6 +78,11 @@ static const match_table_t key_tokens = { {Opt_err, NULL} }; +static bool user_decrypted_data = IS_ENABLED(CONFIG_USER_DECRYPTED_DATA); +module_param(user_decrypted_data, bool, 0); +MODULE_PARM_DESC(user_decrypted_data, + "Allow instantiation of encrypted keys using provided decrypted data"); + static int aes_get_sizes(void) { struct crypto_skcipher *tfm; @@ -158,7 +163,7 @@ static int valid_master_desc(const char *new_desc, const char *orig_desc) * datablob_parse - parse the keyctl data * * datablob format: - * new [] + * new [] [] * load [] * * update @@ -170,7 +175,7 @@ static int valid_master_desc(const char *new_desc, const char *orig_desc) */ static int datablob_parse(char *datablob, const char **format, char **master_desc, char **decrypted_datalen, - char **hex_encoded_iv) + char **hex_encoded_iv, char **decrypted_data) { substring_t args[MAX_OPT_ARGS]; int ret = -EINVAL; @@ -231,6 +236,7 @@ static int datablob_parse(char *datablob, const char **format, "when called from .update method\n", keyword); break; } + *decrypted_data = strsep(&datablob, " \t"); ret = 0; break; case Opt_load: @@ -595,7 +601,8 @@ out: static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, const char *format, const char *master_desc, - const char *datalen) + const char *datalen, + const char *decrypted_data) { struct encrypted_key_payload *epayload = NULL; unsigned short datablob_len; @@ -604,6 +611,7 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, unsigned int encrypted_datalen; unsigned int format_len; long dlen; + int i; int ret; ret = kstrtol(datalen, 10, &dlen); @@ -613,6 +621,24 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, format_len = (!format) ? strlen(key_format_default) : strlen(format); decrypted_datalen = dlen; payload_datalen = decrypted_datalen; + + if (decrypted_data) { + if (!user_decrypted_data) { + pr_err("encrypted key: instantiation of keys using provided decrypted data is disabled since CONFIG_USER_DECRYPTED_DATA is set to false\n"); + return ERR_PTR(-EINVAL); + } + if (strlen(decrypted_data) != decrypted_datalen) { + pr_err("encrypted key: decrypted data provided does not match decrypted data length provided\n"); + return ERR_PTR(-EINVAL); + } + for (i = 0; i < strlen(decrypted_data); i++) { + if (!isxdigit(decrypted_data[i])) { + pr_err("encrypted key: decrypted data provided must contain only hexadecimal characters\n"); + return ERR_PTR(-EINVAL); + } + } + } + if (format) { if (!strcmp(format, key_format_ecryptfs)) { if (dlen != ECRYPTFS_MAX_KEY_BYTES) { @@ -740,13 +766,14 @@ static void __ekey_init(struct encrypted_key_payload *epayload, /* * encrypted_init - initialize an encrypted key * - * For a new key, use a random number for both the iv and data - * itself. For an old key, decrypt the hex encoded data. + * For a new key, use either a random number or user-provided decrypted data in + * case it is provided. A random number is used for the iv in both cases. For + * an old key, decrypt the hex encoded data. */ static int encrypted_init(struct encrypted_key_payload *epayload, const char *key_desc, const char *format, const char *master_desc, const char *datalen, - const char *hex_encoded_iv) + const char *hex_encoded_iv, const char *decrypted_data) { int ret = 0; @@ -760,21 +787,26 @@ static int encrypted_init(struct encrypted_key_payload *epayload, } __ekey_init(epayload, format, master_desc, datalen); - if (!hex_encoded_iv) { - get_random_bytes(epayload->iv, ivsize); - - get_random_bytes(epayload->decrypted_data, - epayload->decrypted_datalen); - } else + if (hex_encoded_iv) { ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv); + } else if (decrypted_data) { + get_random_bytes(epayload->iv, ivsize); + memcpy(epayload->decrypted_data, decrypted_data, + epayload->decrypted_datalen); + } else { + get_random_bytes(epayload->iv, ivsize); + get_random_bytes(epayload->decrypted_data, epayload->decrypted_datalen); + } return ret; } /* * encrypted_instantiate - instantiate an encrypted key * - * Decrypt an existing encrypted datablob or create a new encrypted key - * based on a kernel random number. + * Instantiates the key: + * - by decrypting an existing encrypted datablob, or + * - by creating a new encrypted key based on a kernel random number, or + * - using provided decrypted data. * * On success, return 0. Otherwise return errno. */ @@ -787,6 +819,7 @@ static int encrypted_instantiate(struct key *key, char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; + char *decrypted_data = NULL; size_t datalen = prep->datalen; int ret; @@ -799,18 +832,18 @@ static int encrypted_instantiate(struct key *key, datablob[datalen] = 0; memcpy(datablob, prep->data, datalen); ret = datablob_parse(datablob, &format, &master_desc, - &decrypted_datalen, &hex_encoded_iv); + &decrypted_datalen, &hex_encoded_iv, &decrypted_data); if (ret < 0) goto out; epayload = encrypted_key_alloc(key, format, master_desc, - decrypted_datalen); + decrypted_datalen, decrypted_data); if (IS_ERR(epayload)) { ret = PTR_ERR(epayload); goto out; } ret = encrypted_init(epayload, key->description, format, master_desc, - decrypted_datalen, hex_encoded_iv); + decrypted_datalen, hex_encoded_iv, decrypted_data); if (ret < 0) { kfree_sensitive(epayload); goto out; @@ -860,7 +893,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) buf[datalen] = 0; memcpy(buf, prep->data, datalen); - ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL); + ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL, NULL); if (ret < 0) goto out; @@ -869,7 +902,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) goto out; new_epayload = encrypted_key_alloc(key, epayload->format, - new_master_desc, epayload->datalen); + new_master_desc, epayload->datalen, NULL); if (IS_ERR(new_epayload)) { ret = PTR_ERR(new_epayload); goto out; -- GitLab From 5741150c808b2bbeb1017609f3029daf6651b7d5 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 1 Feb 2022 12:51:41 +0100 Subject: [PATCH 0566/1586] spi: stm32: ignore Rx queue not empty in stm32f4 Tx only mode STM32F4_SPI_SR_RXNE and STM32F4_SPI_SR_OVR are distinct bits in the same status register. ~STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE is thus equal to ~STM32F4_SPI_SR_OVR. The original intention was likely for transmission-only transfers to ignore interrupts both for when the Rx queue has bytes (RXNE) as well as when these bytes haven't been read in time (OVR). Fix the typo by adding the missing parenthesis. Signed-off-by: Ahmad Fatoum Link: https://lore.kernel.org/r/20220201115142.3999860-1-a.fatoum@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 7fc24505a72cd..a6adc20f68626 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -763,7 +763,7 @@ static irqreturn_t stm32f4_spi_irq_event(int irq, void *dev_id) if (!spi->cur_usedma && (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX)) { /* OVR flag shouldn't be handled for TX only mode */ - sr &= ~STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE; + sr &= ~(STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE); mask |= STM32F4_SPI_SR_TXE; } -- GitLab From 1847e3046c528bd85bd51e2860f4139bd9052d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Sat, 19 Feb 2022 14:15:48 +0100 Subject: [PATCH 0567/1586] spi: gpio: Implement LSB First bitbang support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for slave DT property spi-lsb-first, i.e., SPI_LSB_FIRST mode. Duplicate the inline helpers bitbang_txrx_be_cpha{0,1} as LE versions. Conditionally call them from all the spi-gpio txrx_word callbacks. Some alternatives to this implementation approach were discussed back then [0], but eventually it was considered reasonable. [0] https://lore.kernel.org/linux-arm-kernel/20191212033952.5967-8-afaerber@suse.de/ Signed-off-by: Andreas Färber Signed-off-by: Heiner Kallweit Tested-by: Christian Hewitt Link: https://lore.kernel.org/r/feac3377-4ad1-77d8-9a18-3588d80fb909@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-bitbang-txrx.h | 66 ++++++++++++++++++++++++++++++++++ drivers/spi/spi-gpio.c | 42 +++++++++++++++++----- 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h index ae61d72c7d281..267342dfa7388 100644 --- a/drivers/spi/spi-bitbang-txrx.h +++ b/drivers/spi/spi-bitbang-txrx.h @@ -41,6 +41,8 @@ * chips need ... there may be several reasons you'd need to tweak timings * in these routines, not just to make it faster or slower to match a * particular CPU clock rate. + * + * ToDo: Maybe the bitrev macros can be used to improve the code? */ static inline u32 @@ -106,3 +108,67 @@ bitbang_txrx_be_cpha1(struct spi_device *spi, } return word; } + +static inline u32 +bitbang_txrx_le_cpha0(struct spi_device *spi, + unsigned int nsecs, unsigned int cpol, unsigned int flags, + u32 word, u8 bits) +{ + /* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */ + + u32 oldbit = !(word & 1); + /* clock starts at inactive polarity */ + for (; likely(bits); bits--) { + + /* setup LSB (to slave) on trailing edge */ + if ((flags & SPI_MASTER_NO_TX) == 0) { + if ((word & 1) != oldbit) { + setmosi(spi, word & 1); + oldbit = word & 1; + } + } + spidelay(nsecs); /* T(setup) */ + + setsck(spi, !cpol); + spidelay(nsecs); + + /* sample LSB (from slave) on leading edge */ + word >>= 1; + if ((flags & SPI_MASTER_NO_RX) == 0) + word |= getmiso(spi) << (bits - 1); + setsck(spi, cpol); + } + return word; +} + +static inline u32 +bitbang_txrx_le_cpha1(struct spi_device *spi, + unsigned int nsecs, unsigned int cpol, unsigned int flags, + u32 word, u8 bits) +{ + /* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */ + + u32 oldbit = !(word & 1); + /* clock starts at inactive polarity */ + for (; likely(bits); bits--) { + + /* setup LSB (to slave) on leading edge */ + setsck(spi, !cpol); + if ((flags & SPI_MASTER_NO_TX) == 0) { + if ((word & 1) != oldbit) { + setmosi(spi, word & 1); + oldbit = word & 1; + } + } + spidelay(nsecs); /* T(setup) */ + + setsck(spi, cpol); + spidelay(nsecs); + + /* sample LSB (from slave) on trailing edge */ + word >>= 1; + if ((flags & SPI_MASTER_NO_RX) == 0) + word |= getmiso(spi) << (bits - 1); + } + return word; +} diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c index 0584f4d2fde29..4b12c4964a664 100644 --- a/drivers/spi/spi-gpio.c +++ b/drivers/spi/spi-gpio.c @@ -135,25 +135,37 @@ static inline int getmiso(const struct spi_device *spi) static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { - return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits); + else + return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits); } static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { - return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits); + else + return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits); } static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { - return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits); + else + return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits); } static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { - return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits); + else + return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits); } /* @@ -170,28 +182,40 @@ static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { flags = spi->master->flags; - return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits); + else + return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits); } static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { flags = spi->master->flags; - return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits); + else + return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits); } static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { flags = spi->master->flags; - return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits); + else + return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits); } static u32 spi_gpio_spec_txrx_word_mode3(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits, unsigned flags) { flags = spi->master->flags; - return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits); + if (unlikely(spi->mode & SPI_LSB_FIRST)) + return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits); + else + return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits); } /*----------------------------------------------------------------------*/ @@ -378,7 +402,7 @@ static int spi_gpio_probe(struct platform_device *pdev) master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL | - SPI_CS_HIGH; + SPI_CS_HIGH | SPI_LSB_FIRST; if (!spi_gpio->mosi) { /* HW configuration without MOSI pin * -- GitLab From 86d7331299fda7634b11c1b7c911432679d525a5 Mon Sep 17 00:00:00 2001 From: Zhang Wensheng Date: Thu, 17 Feb 2022 14:42:47 +0800 Subject: [PATCH 0568/1586] block: update io_ticks when io hang When the inflight IOs are slow and no new IOs are issued, we expect iostat could manifest the IO hang problem. However after commit 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting"), io_tick and time_in_queue will not be updated until the end of IO, and the avgqu-sz and %util columns of iostat will be zero. Because it has using stat.nsecs accumulation to express time_in_queue which is not suitable to change, and may %util will express the status better when io hang occur. To fix io_ticks, we use update_io_ticks and inflight to update io_ticks when diskstats_show and part_stat_show been called. Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting") Signed-off-by: Zhang Wensheng Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220217064247.4041435-1-zhangwensheng5@huawei.com Signed-off-by: Jens Axboe --- block/genhd.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index e351fac41bf25..54f60ded2ee6f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -919,12 +919,17 @@ ssize_t part_stat_show(struct device *dev, struct disk_stats stat; unsigned int inflight; - part_stat_read_all(bdev, &stat); if (queue_is_mq(q)) inflight = blk_mq_in_flight(q, bdev); else inflight = part_in_flight(bdev); + if (inflight) { + part_stat_lock(); + update_io_ticks(bdev, jiffies, true); + part_stat_unlock(); + } + part_stat_read_all(bdev, &stat); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -1184,12 +1189,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) xa_for_each(&gp->part_tbl, idx, hd) { if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) continue; - part_stat_read_all(hd, &stat); if (queue_is_mq(gp->queue)) inflight = blk_mq_in_flight(gp->queue, hd); else inflight = part_in_flight(hd); + if (inflight) { + part_stat_lock(); + update_io_ticks(hd, jiffies, true); + part_stat_unlock(); + } + part_stat_read_all(hd, &stat); seq_printf(seqf, "%4d %7d %pg " "%lu %lu %lu %u " "%lu %lu %lu %u " -- GitLab From e0891269a8c25715bd9510dc355326b00ab42db2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:26 +0000 Subject: [PATCH 0569/1586] linkage: add SYM_FUNC_ALIAS{,_LOCAL,_WEAK}() Currently aliasing an asm function requires adding START and END annotations for each name, as per Documentation/asm-annotations.rst: SYM_FUNC_START_ALIAS(__memset) SYM_FUNC_START(memset) ... asm insns ... SYM_FUNC_END(memset) SYM_FUNC_END_ALIAS(__memset) This is more painful than necessary to maintain, especially where a function has many aliases, some of which we may wish to define conditionally. For example, arm64's memcpy/memmove implementation (which uses some arch-specific SYM_*() helpers) has: SYM_FUNC_START_ALIAS(__memmove) SYM_FUNC_START_ALIAS_WEAK_PI(memmove) SYM_FUNC_START_ALIAS(__memcpy) SYM_FUNC_START_WEAK_PI(memcpy) ... asm insns ... SYM_FUNC_END_PI(memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_END_ALIAS_PI(memmove) EXPORT_SYMBOL(memmove) SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(__memmove) SYM_FUNC_START(name) It would be much nicer if we could define the aliases *after* the standard function definition. This would avoid the need to specify each symbol name twice, and would make it easier to spot the canonical function definition. This patch adds new macros to allow us to do so, which allows the above example to be rewritten more succinctly as: SYM_FUNC_START(__pi_memcpy) ... asm insns ... SYM_FUNC_END(__pi_memcpy) SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) The reduction in duplication will also make it possible to replace some uses of WEAK with more accurate Kconfig guards, e.g. #ifndef CONFIG_KASAN SYM_FUNC_ALIAS(memmove, __memmove) EXPORT_SYMBOL(memmove) #endif ... which should make it easier to ensure that symbols are neither used nor overidden unexpectedly. The existing SYM_FUNC_START_ALIAS() and SYM_FUNC_START_LOCAL_ALIAS() are marked as deprecated, and will be removed once existing users are moved over to the new scheme. The tools/perf/ copy of linkage.h is updated to match. A subsequent patch will depend upon this when updating the x86 asm annotations. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Slaby Cc: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- Documentation/asm-annotations.rst | 16 +++++++++-- include/linux/linkage.h | 37 ++++++++++++++++++++++++- tools/perf/util/include/linux/linkage.h | 35 +++++++++++++++++++++++ 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst index f4bf0f6395fb9..4868b58c60fb1 100644 --- a/Documentation/asm-annotations.rst +++ b/Documentation/asm-annotations.rst @@ -130,8 +130,20 @@ denoting a range of code via ``SYM_*_START/END`` annotations. In fact, this kind of annotation corresponds to the now deprecated ``ENTRY`` and ``ENDPROC`` macros. -* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those - who decided to have two or more names for one function. The typical use is:: +* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can + be used to define multiple names for a function. The typical use is:: + + SYM_FUNC_START(__memset) + ... asm insns ... + SYN_FUNC_END(__memset) + SYM_FUNC_ALIAS(memset, __memset) + + In this example, one can call ``__memset`` or ``memset`` with the same + result, except the debug information for the instructions is generated to + the object file only once -- for the non-``ALIAS`` case. + +* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` are deprecated + ways to define two or more names for one function. The typical use is:: SYM_FUNC_START_ALIAS(__memset) SYM_FUNC_START(memset) diff --git a/include/linux/linkage.h b/include/linux/linkage.h index dbf8506decca0..e574a84d8b11a 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -165,7 +165,18 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ - .size name, .-name + .set .L__sym_size_##name, .-name ASM_NL \ + .size name, .L__sym_size_##name +#endif + +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias #endif /* === code annotations === */ @@ -275,6 +286,30 @@ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + /* SYM_CODE_START -- use for non-C (special) functions */ #ifndef SYM_CODE_START #define SYM_CODE_START(name) \ diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 5acf053fca7d4..7b4cd7947e3f2 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -50,9 +50,20 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ + .set .L__sym_size_##name, .-name ASM_NL \ .size name, .-name #endif +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias +#endif + /* * SYM_FUNC_START_ALIAS -- use where there are two global names for one * function @@ -101,4 +112,28 @@ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ -- GitLab From 0f61f6be1f7f44edfab0cb731c0a2340a838956f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:27 +0000 Subject: [PATCH 0570/1586] arm64: clean up symbol aliasing Now that we have SYM_FUNC_ALIAS() and SYM_FUNC_ALIAS_WEAK(), use those to simplify and more consistently define function aliases across arch/arm64. Aliases are now defined in terms of a canonical function name. For position-independent functions I've made the __pi_ name the canonical name, and defined other alises in terms of this. The SYM_FUNC_{START,END}_PI(func) macros obscure the __pi_ name, and make this hard to seatch for. The SYM_FUNC_START_WEAK_PI() macro also obscures the fact that the __pi_ fymbol is global and the symbol is weak. For clarity, I have removed these macros and used SYM_FUNC_{START,END}() directly with the __pi_ name. For example: SYM_FUNC_START_WEAK_PI(func) ... asm insns ... SYM_FUNC_END_PI(func) EXPORT_SYMBOL(func) ... becomes: SYM_FUNC_START(__pi_func) ... asm insns ... SYM_FUNC_END(__pi_func) SYM_FUNC_ALIAS_WEAK(func, __pi_func) EXPORT_SYMBOL(func) For clarity, where there are multiple annotations such as EXPORT_SYMBOL(), I've tried to keep annotations grouped by symbol. For example, where a function has a name and an alias which are both exported, this is organised as: SYM_FUNC_START(func) ... asm insns ... SYM_FUNC_END(func) EXPORT_SYMBOL(func) SYM_FUNC_ALIAS(alias, func) EXPORT_SYMBOL(alias) For consistency with the other string functions, I've defined strrchr as a position-independent function, as it can safely be used as such even though we have no users today. As we no longer use SYM_FUNC_{START,END}_ALIAS(), our local copies are removed. The common versions will be removed by a subsequent patch. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Joey Gouly Cc: Will Deacon Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/linkage.h | 24 ---------------------- arch/arm64/kvm/hyp/nvhe/cache.S | 5 +++-- arch/arm64/lib/clear_page.S | 5 +++-- arch/arm64/lib/copy_page.S | 5 +++-- arch/arm64/lib/memchr.S | 5 +++-- arch/arm64/lib/memcmp.S | 6 +++--- arch/arm64/lib/memcpy.S | 21 ++++++++++--------- arch/arm64/lib/memset.S | 12 ++++++----- arch/arm64/lib/strchr.S | 6 ++++-- arch/arm64/lib/strcmp.S | 6 +++--- arch/arm64/lib/strlen.S | 6 +++--- arch/arm64/lib/strncmp.S | 6 +++--- arch/arm64/lib/strnlen.S | 6 ++++-- arch/arm64/lib/strrchr.S | 5 +++-- arch/arm64/mm/cache.S | 35 +++++++++++++++++++------------- 15 files changed, 74 insertions(+), 79 deletions(-) diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index b77e9b3f5371c..43f8c25b3fda6 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -39,28 +39,4 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; -/* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define SYM_FUNC_START_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START(x) - -#define SYM_FUNC_START_WEAK_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START_WEAK(x) - -#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN) - -#define SYM_FUNC_END_PI(x) \ - SYM_FUNC_END(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - -#define SYM_FUNC_END_ALIAS_PI(x) \ - SYM_FUNC_END_ALIAS(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - #endif diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0e..0c367eb5f4e28 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,8 @@ #include #include -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 1fd5d790ab800..ebde40e7fa2b2 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -SYM_FUNC_START_PI(clear_page) +SYM_FUNC_START(__pi_clear_page) mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf @@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 2b ret -SYM_FUNC_END_PI(clear_page) +SYM_FUNC_END(__pi_clear_page) +SYM_FUNC_ALIAS(clear_page, __pi_clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 29144f4cd4492..c336d2ffdec55 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -SYM_FUNC_START_PI(copy_page) +SYM_FUNC_START(__pi_copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,6 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112 - 256] ret -SYM_FUNC_END_PI(copy_page) +SYM_FUNC_END(__pi_copy_page) +SYM_FUNC_ALIAS(copy_page, __pi_copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 7c2276fdab543..37a9f2a4f7f4b 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -38,7 +38,7 @@ .p2align 4 nop -SYM_FUNC_START_WEAK_PI(memchr) +SYM_FUNC_START(__pi_memchr) and chrin, chrin, #0xff lsr wordcnt, cntin, #3 cbz wordcnt, L(byte_loop) @@ -71,5 +71,6 @@ CPU_LE( rev tmp, tmp) L(not_found): mov result, #0 ret -SYM_FUNC_END_PI(memchr) +SYM_FUNC_END(__pi_memchr) +SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 7d956384222ff..a5ccf2c55f911 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -32,7 +32,7 @@ #define tmp1 x7 #define tmp2 x8 -SYM_FUNC_START_WEAK_PI(memcmp) +SYM_FUNC_START(__pi_memcmp) subs limit, limit, 8 b.lo L(less8) @@ -134,6 +134,6 @@ L(byte_loop): b.eq L(byte_loop) sub result, data1w, data2w ret - -SYM_FUNC_END_PI(memcmp) +SYM_FUNC_END(__pi_memcmp) +SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index b82fd64ee1e1c..4ab48d49c4515 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,10 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_WEAK_ALIAS_PI(memmove) -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK_PI(memcpy) +SYM_FUNC_START(__pi_memcpy) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -241,12 +238,16 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy) -SYM_FUNC_END_PI(memcpy) -EXPORT_SYMBOL(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) +SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_END_ALIAS_PI(memmove) -EXPORT_SYMBOL(memmove) -SYM_FUNC_END_ALIAS(__memmove) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + +SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) + +SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a9c1c9a01ea90..a5aebe82ad73b 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,8 +42,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_WEAK_PI(memset) +SYM_FUNC_START(__pi_memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -SYM_FUNC_END_PI(memset) -EXPORT_SYMBOL(memset) -SYM_FUNC_END_ALIAS(__memset) +SYM_FUNC_END(__pi_memset) + +SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) +EXPORT_SYMBOL(memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index 1f47eae3b0d6d..94ee67a6b212c 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK(strchr) +SYM_FUNC_START(__pi_strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -SYM_FUNC_END(strchr) +SYM_FUNC_END(__pi_strchr) + +SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 83bcad72ec972..cda7de747efcf 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -41,7 +41,7 @@ /* Start of performance-critical section -- one 64B cache line. */ .align 6 -SYM_FUNC_START_WEAK_PI(strcmp) +SYM_FUNC_START(__pi_strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -171,6 +171,6 @@ L(loop_misaligned): L(done): sub result, data1, data2 ret - -SYM_FUNC_END_PI(strcmp) +SYM_FUNC_END(__pi_strcmp) +SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) EXPORT_SYMBOL_NOHWKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 1648790e91b3c..4919fe81ae540 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -79,7 +79,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -SYM_FUNC_START_WEAK_PI(strlen) +SYM_FUNC_START(__pi_strlen) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -208,6 +208,6 @@ L(page_cross): csel data1, data1, tmp4, eq csel data2, data2, tmp2, eq b L(page_cross_entry) - -SYM_FUNC_END_PI(strlen) +SYM_FUNC_END(__pi_strlen) +SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e42bcfcd37e6f..a848abcec975e 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -44,7 +44,7 @@ #define endloop x15 #define count mask -SYM_FUNC_START_WEAK_PI(strncmp) +SYM_FUNC_START(__pi_strncmp) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -256,6 +256,6 @@ L(done_loop): L(ret0): mov result, #0 ret - -SYM_FUNC_END_PI(strncmp) +SYM_FUNC_END(__pi_strncmp) +SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp) EXPORT_SYMBOL_NOHWKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index b72913a990389..d5ac0e10a01db 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -SYM_FUNC_START_WEAK_PI(strnlen) +SYM_FUNC_START(__pi_strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,7 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -SYM_FUNC_END_PI(strnlen) +SYM_FUNC_END(__pi_strnlen) + +SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 13132d1ed6d12..a5123cf0ce125 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK_PI(strrchr) +SYM_FUNC_START(__pi_strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr) b 1b 2: mov x0, x3 ret -SYM_FUNC_END_PI(strrchr) +SYM_FUNC_END(__pi_strrchr) +SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7d0563db42014..0ea6cc25dc663 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) /* * dcache_clean_pou(start, end) @@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou) * - start - kernel start address of region * - end - kernel end address of region */ -SYM_FUNC_START_PI(dcache_inval_poc) +SYM_FUNC_START(__pi_dcache_inval_poc) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(dcache_inval_poc) +SYM_FUNC_END(__pi_dcache_inval_poc) +SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) /* * dcache_clean_poc(start, end) @@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_poc) +SYM_FUNC_START(__pi_dcache_clean_poc) dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_poc) +SYM_FUNC_END(__pi_dcache_clean_poc) +SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) /* * dcache_clean_pop(start, end) @@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_pop) +SYM_FUNC_START(__pi_dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_pop) +SYM_FUNC_END(__pi_dcache_clean_pop) +SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) /* * __dma_flush_area(start, size) @@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop) * - start - virtual start address of region * - size - size in question */ -SYM_FUNC_START_PI(__dma_flush_area) +SYM_FUNC_START(__pi___dma_flush_area) add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__dma_flush_area) +SYM_FUNC_END(__pi___dma_flush_area) +SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_map_area) +SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc -SYM_FUNC_END_PI(__dma_map_area) +SYM_FUNC_END(__pi___dma_map_area) +SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_unmap_area) +SYM_FUNC_START(__pi___dma_unmap_area) add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __pi_dcache_inval_poc ret -SYM_FUNC_END_PI(__dma_unmap_area) +SYM_FUNC_END(__pi___dma_unmap_area) +SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) -- GitLab From 7be2e319640c8926bbba4e004a1bee9cf6ed67b0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:28 +0000 Subject: [PATCH 0571/1586] x86: clean up symbol aliasing Now that we have SYM_FUNC_ALIAS() and SYM_FUNC_ALIAS_WEAK(), use those to simplify the definition of function aliases across arch/x86. For clarity, where there are multiple annotations such as EXPORT_SYMBOL(), I've tried to keep annotations grouped by symbol. For example, where a function has a name and an alias which are both exported, this is organised as: SYM_FUNC_START(func) ... asm insns ... SYM_FUNC_END(func) EXPORT_SYMBOL(func) SYM_FUNC_ALIAS(alias, func) EXPORT_SYMBOL(alias) Where there are only aliases and no exports or other annotations, I have not bothered with line spacing, e.g. SYM_FUNC_START(func) ... asm insns ... SYM_FUNC_END(func) SYM_FUNC_ALIAS(alias, func) The tools/perf/ copies of memset_64.S and memset_64.S are updated likewise to avoid the build system complaining these are mismatched: | Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' | diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S | Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' | diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jiri Slaby Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/x86/boot/compressed/head_32.S | 3 +-- arch/x86/boot/compressed/head_64.S | 3 +-- arch/x86/crypto/aesni-intel_asm.S | 4 +--- arch/x86/lib/memcpy_64.S | 10 +++++----- arch/x86/lib/memmove_64.S | 4 ++-- arch/x86/lib/memset_64.S | 6 +++--- tools/arch/x86/lib/memcpy_64.S | 10 +++++----- tools/arch/x86/lib/memset_64.S | 6 +++--- 8 files changed, 21 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 659fad53ca823..3b354eb9516df 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB SYM_FUNC_START(efi32_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp movl 8(%esp), %esi /* save boot_params pointer */ call efi_main /* efi_main returns the possibly relocated address of startup_32 */ jmp *%eax SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) #endif .text diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fd9441f404570..dea95301196b8 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -535,7 +535,6 @@ SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB .org 0x390 SYM_FUNC_START(efi64_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ call efi_main @@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) leaq rva(startup_64)(%rax), %rax jmp *%rax SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 363699dd72206..837c1e0aa0217 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize) #endif - -SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) SYM_FUNC_START_LOCAL(_key_expansion_256a) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) add $0x10, TKEYP RET SYM_FUNC_END(_key_expansion_256a) -SYM_FUNC_END_ALIAS(_key_expansion_128) +SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d90..d0d7b9bc6cad3 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df7128..d83cba364e31d 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,6 @@ * Output: * rax: dest */ -SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax @@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove) 13: RET SYM_FUNC_END(__memmove) -SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index d624f2bc42f16..fc9ffd3ff3b21 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf2343f3d90..d0d7b9bc6cad3 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d624f2bc42f16..fc9ffd3ff3b21 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. -- GitLab From be9aea74400433e03c2a8b0260fc9ffe2495f698 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:29 +0000 Subject: [PATCH 0572/1586] linkage: remove SYM_FUNC_{START,END}_ALIAS() Now that all aliases are defined using SYM_FUNC_ALIAS(), remove the old SYM_FUNC_{START,END}_ALIAS() macros. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Slaby Cc: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- Documentation/asm-annotations.rst | 13 ----------- include/linux/linkage.h | 30 ------------------------- tools/perf/util/include/linux/linkage.h | 21 ----------------- 3 files changed, 64 deletions(-) diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst index 4868b58c60fb1..a64f2ca469d45 100644 --- a/Documentation/asm-annotations.rst +++ b/Documentation/asm-annotations.rst @@ -142,19 +142,6 @@ denoting a range of code via ``SYM_*_START/END`` annotations. result, except the debug information for the instructions is generated to the object file only once -- for the non-``ALIAS`` case. -* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` are deprecated - ways to define two or more names for one function. The typical use is:: - - SYM_FUNC_START_ALIAS(__memset) - SYM_FUNC_START(memset) - ... asm insns ... - SYM_FUNC_END(memset) - SYM_FUNC_END_ALIAS(__memset) - - In this example, one can call ``__memset`` or ``memset`` with the same - result, except the debug information for the instructions is generated to - the object file only once -- for the non-``ALIAS`` case. - * ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in special cases -- if you know what you are doing. This is used exclusively for interrupt handlers and similar where the calling convention is not the C diff --git a/include/linux/linkage.h b/include/linux/linkage.h index e574a84d8b11a..acb1ad2356f1b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -211,30 +211,8 @@ SYM_ENTRY(name, linkage, SYM_A_NONE) #endif -/* - * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one - * function - */ -#ifndef SYM_FUNC_START_LOCAL_ALIAS -#define SYM_FUNC_START_LOCAL_ALIAS(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) -#endif - -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) -#endif - /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif @@ -247,7 +225,6 @@ /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif @@ -270,18 +247,11 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 7b4cd7947e3f2..aa0c5179836d1 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -64,38 +64,18 @@ .size alias, .L__sym_size_##alias #endif -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) -#endif - /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* SYM_FUNC_START_WEAK -- use for weak functions */ #ifndef SYM_FUNC_START_WEAK #define SYM_FUNC_START_WEAK(name) \ @@ -107,7 +87,6 @@ * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif -- GitLab From 272ceeaea355214b301530e262a0df8600bfca95 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Feb 2022 11:44:51 -0500 Subject: [PATCH 0573/1586] audit: log AUDIT_TIME_* records only from rules AUDIT_TIME_* events are generated when there are syscall rules present that are not related to time keeping. This will produce noisy log entries that could flood the logs and hide events we really care about. Rather than immediately produce the AUDIT_TIME_* records, store the data in the context and log it at syscall exit time respecting the filter rules. Note: This eats the audit_buffer, unlike any others in show_special(). Please see https://bugzilla.redhat.com/show_bug.cgi?id=1991919 Fixes: 7e8eda734d30 ("ntp: Audit NTP parameters adjustment") Fixes: 2d87a0674bd6 ("timekeeping: Audit clock adjustments") Signed-off-by: Richard Guy Briggs [PM: fixed style/whitespace issues] Signed-off-by: Paul Moore --- kernel/audit.h | 4 +++ kernel/auditsc.c | 87 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/kernel/audit.h b/kernel/audit.h index c4498090a5bd6..58b66543b4d57 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -201,6 +201,10 @@ struct audit_context { struct { char *name; } module; + struct { + struct audit_ntp_data ntp_data; + struct timespec64 tk_injoffset; + } time; }; int fds[2]; struct audit_proctitle proctitle; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fce5d43a933f0..0efd75e4730f2 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1340,6 +1340,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) from_kuid(&init_user_ns, name->fcap.rootid)); } +static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) +{ + const struct audit_ntp_data *ntp = &context->time.ntp_data; + const struct timespec64 *tk = &context->time.tk_injoffset; + static const char * const ntp_name[] = { + "offset", + "freq", + "status", + "tai", + "tick", + "adjust", + }; + int type; + + if (context->type == AUDIT_TIME_ADJNTPVAL) { + for (type = 0; type < AUDIT_NTP_NVALS; type++) { + if (ntp->vals[type].newval != ntp->vals[type].oldval) { + if (!*ab) { + *ab = audit_log_start(context, + GFP_KERNEL, + AUDIT_TIME_ADJNTPVAL); + if (!*ab) + return; + } + audit_log_format(*ab, "op=%s old=%lli new=%lli", + ntp_name[type], + ntp->vals[type].oldval, + ntp->vals[type].newval); + audit_log_end(*ab); + *ab = NULL; + } + } + } + if (tk->tv_sec != 0 || tk->tv_nsec != 0) { + if (!*ab) { + *ab = audit_log_start(context, GFP_KERNEL, + AUDIT_TIME_INJOFFSET); + if (!*ab) + return; + } + audit_log_format(*ab, "sec=%lli nsec=%li", + (long long)tk->tv_sec, tk->tv_nsec); + audit_log_end(*ab); + *ab = NULL; + } +} + static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; @@ -1454,6 +1501,11 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "(null)"); break; + case AUDIT_TIME_ADJNTPVAL: + case AUDIT_TIME_INJOFFSET: + /* this call deviates from the rest, eating the buffer */ + audit_log_time(context, &ab); + break; } audit_log_end(ab); } @@ -2849,31 +2901,26 @@ void __audit_fanotify(unsigned int response) void __audit_tk_injoffset(struct timespec64 offset) { - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, - "sec=%lli nsec=%li", - (long long)offset.tv_sec, offset.tv_nsec); -} - -static void audit_log_ntp_val(const struct audit_ntp_data *ad, - const char *op, enum audit_ntp_type type) -{ - const struct audit_ntp_val *val = &ad->vals[type]; - - if (val->newval == val->oldval) - return; + struct audit_context *context = audit_context(); - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, - "op=%s old=%lli new=%lli", op, val->oldval, val->newval); + /* only set type if not already set by NTP */ + if (!context->type) + context->type = AUDIT_TIME_INJOFFSET; + memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); } void __audit_ntp_log(const struct audit_ntp_data *ad) { - audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); - audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); - audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); - audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); - audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); - audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); + struct audit_context *context = audit_context(); + int type; + + for (type = 0; type < AUDIT_NTP_NVALS; type++) + if (ad->vals[type].newval != ad->vals[type].oldval) { + /* unconditionally set type, overwriting TK */ + context->type = AUDIT_TIME_ADJNTPVAL; + memcpy(&context->time.ntp_data, ad, sizeof(*ad)); + break; + } } void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, -- GitLab From be9a2277cafd318976d59c41a7f45a934ec43b26 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:23:59 +0100 Subject: [PATCH 0574/1586] fork: Redo ifdefs around task stack handling The use of ifdef CONFIG_VMAP_STACK is confusing in terms what is actually happenning and what can happen. For instance from reading free_thread_stack() it appears that in the CONFIG_VMAP_STACK case it may receive a non-NULL vm pointer but it may also be NULL in which case __free_pages() is used to free the stack. This is however not the case because in the VMAP case a non-NULL pointer is always returned here. Since it looks like this might happen, the compiler creates the correct dead code with the invocation to __free_pages() and everything around it. Twice. Add spaces between the ifdef and the identifer to recognize the ifdef level which is currently in scope. Add the current identifer as a comment behind #else and #endif. Move the code within free_thread_stack() and alloc_thread_stack_node() into the relevant ifdef blocks. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-2-bigeasy@linutronix.de --- kernel/fork.c | 74 +++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index a024bf6254df8..f5cc10164334a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -185,7 +185,7 @@ static inline void free_task_struct(struct task_struct *tsk) */ # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) -#ifdef CONFIG_VMAP_STACK +# ifdef CONFIG_VMAP_STACK /* * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB * flush. Try to minimize the number of calls by caching stacks. @@ -210,11 +210,9 @@ static int free_vm_stack_cache(unsigned int cpu) return 0; } -#endif static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { -#ifdef CONFIG_VMAP_STACK void *stack; int i; @@ -258,45 +256,53 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) tsk->stack = stack; } return stack; -#else - struct page *page = alloc_pages_node(node, THREADINFO_GFP, - THREAD_SIZE_ORDER); - - if (likely(page)) { - tsk->stack = kasan_reset_tag(page_address(page)); - return tsk->stack; - } - return NULL; -#endif } -static inline void free_thread_stack(struct task_struct *tsk) +static void free_thread_stack(struct task_struct *tsk) { -#ifdef CONFIG_VMAP_STACK struct vm_struct *vm = task_stack_vm_area(tsk); + int i; - if (vm) { - int i; + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + memcg_kmem_uncharge_page(vm->pages[i], 0); - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); + for (i = 0; i < NR_CACHED_STACKS; i++) { + if (this_cpu_cmpxchg(cached_stacks[i], NULL, + tsk->stack_vm_area) != NULL) + continue; - for (i = 0; i < NR_CACHED_STACKS; i++) { - if (this_cpu_cmpxchg(cached_stacks[i], - NULL, tsk->stack_vm_area) != NULL) - continue; + tsk->stack = NULL; + tsk->stack_vm_area = NULL; + return; + } + vfree_atomic(tsk->stack); + tsk->stack = NULL; + tsk->stack_vm_area = NULL; +} - return; - } +# else /* !CONFIG_VMAP_STACK */ - vfree_atomic(tsk->stack); - return; +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) +{ + struct page *page = alloc_pages_node(node, THREADINFO_GFP, + THREAD_SIZE_ORDER); + + if (likely(page)) { + tsk->stack = kasan_reset_tag(page_address(page)); + return tsk->stack; } -#endif + return NULL; +} +static void free_thread_stack(struct task_struct *tsk) +{ __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); + tsk->stack = NULL; } -# else + +# endif /* CONFIG_VMAP_STACK */ +# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */ + static struct kmem_cache *thread_stack_cache; static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, @@ -312,6 +318,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, static void free_thread_stack(struct task_struct *tsk) { kmem_cache_free(thread_stack_cache, tsk->stack); + tsk->stack = NULL; } void thread_stack_cache_init(void) @@ -321,8 +328,9 @@ void thread_stack_cache_init(void) THREAD_SIZE, NULL); BUG_ON(thread_stack_cache == NULL); } -# endif -#endif + +# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; @@ -432,10 +440,6 @@ static void release_task_stack(struct task_struct *tsk) account_kernel_stack(tsk, -1); free_thread_stack(tsk); - tsk->stack = NULL; -#ifdef CONFIG_VMAP_STACK - tsk->stack_vm_area = NULL; -#endif } #ifdef CONFIG_THREAD_INFO_IN_TASK -- GitLab From 546c42b2c5c161619736dd730d3df709181999d0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:00 +0100 Subject: [PATCH 0575/1586] fork: Duplicate task_struct before stack allocation alloc_thread_stack_node() already populates the task_struct::stack member except on IA64. The stack pointer is saved and populated again because IA64 needs it and arch_dup_task_struct() overwrites it. Allocate thread's stack after task_struct has been duplicated as a preparation for further changes. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-3-bigeasy@linutronix.de --- kernel/fork.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index f5cc10164334a..30c01ce2ae57d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -888,6 +888,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; + err = arch_dup_task_struct(tsk, orig); + if (err) + goto free_tsk; + stack = alloc_thread_stack_node(tsk, node); if (!stack) goto free_tsk; @@ -897,8 +901,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) stack_vm_area = task_stack_vm_area(tsk); - err = arch_dup_task_struct(tsk, orig); - /* * arch_dup_task_struct() clobbers the stack-related fields. Make * sure they're properly initialized before using any stack-related @@ -912,9 +914,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) refcount_set(&tsk->stack_refcount, 1); #endif - if (err) - goto free_stack; - err = scs_prepare(tsk, node); if (err) goto free_stack; -- GitLab From 2bb0529c0bc0698f3baf3e88ffd61a18eef252a7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:01 +0100 Subject: [PATCH 0576/1586] fork, IA64: Provide alloc_thread_stack_node() for IA64 Provide a generic alloc_thread_stack_node() for IA64 and CONFIG_ARCH_THREAD_STACK_ALLOCATOR which returns stack pointer and sets task_struct::stack so it behaves exactly like the other implementations. Rename IA64's alloc_thread_stack_node() and add the generic version to the fork code so it is in one place _and_ to drastically lower the chances of fat fingering the IA64 code. Do the same for free_thread_stack(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-4-bigeasy@linutronix.de --- arch/ia64/include/asm/thread_info.h | 6 +++--- kernel/fork.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 51d20cb377062..1684716f08201 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -55,15 +55,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_stack_node(tsk, node) \ +#define arch_alloc_thread_stack_node(tsk, node) \ ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) +#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_stack(tsk) /* nothing */ +#define arch_free_thread_stack(tsk) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/kernel/fork.c b/kernel/fork.c index 30c01ce2ae57d..7b70c47410720 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -330,6 +330,23 @@ void thread_stack_cache_init(void) } # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ + +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) +{ + unsigned long *stack; + + stack = arch_alloc_thread_stack_node(tsk, node); + tsk->stack = stack; + return stack; +} + +static void free_thread_stack(struct task_struct *tsk) +{ + arch_free_thread_stack(tsk); + tsk->stack = NULL; +} + #endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ /* SLAB cache for signal_struct structures (tsk->signal) */ -- GitLab From 7865aba3ade4cf30f0ac08e015550084a50d9afb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:02 +0100 Subject: [PATCH 0577/1586] fork: Don't assign the stack pointer in dup_task_struct() All four versions of alloc_thread_stack_node() assign now task_struct::stack in case the allocation was successful. Let alloc_thread_stack_node() return an error code instead of the stack pointer and remove the stack assignment in dup_task_struct(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-5-bigeasy@linutronix.de --- kernel/fork.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 7b70c47410720..875bd43f02ca8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -211,7 +211,7 @@ static int free_vm_stack_cache(unsigned int cpu) return 0; } -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) +static int alloc_thread_stack_node(struct task_struct *tsk, int node) { void *stack; int i; @@ -232,7 +232,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) tsk->stack_vm_area = s; tsk->stack = s->addr; - return s->addr; + return 0; } /* @@ -245,17 +245,16 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) THREADINFO_GFP & ~__GFP_ACCOUNT, PAGE_KERNEL, 0, node, __builtin_return_address(0)); - + if (!stack) + return -ENOMEM; /* * We can't call find_vm_area() in interrupt context, and * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - if (stack) { - tsk->stack_vm_area = find_vm_area(stack); - tsk->stack = stack; - } - return stack; + tsk->stack_vm_area = find_vm_area(stack); + tsk->stack = stack; + return 0; } static void free_thread_stack(struct task_struct *tsk) @@ -282,16 +281,16 @@ static void free_thread_stack(struct task_struct *tsk) # else /* !CONFIG_VMAP_STACK */ -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) +static int alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); if (likely(page)) { tsk->stack = kasan_reset_tag(page_address(page)); - return tsk->stack; + return 0; } - return NULL; + return -ENOMEM; } static void free_thread_stack(struct task_struct *tsk) @@ -305,14 +304,13 @@ static void free_thread_stack(struct task_struct *tsk) static struct kmem_cache *thread_stack_cache; -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, - int node) +static int alloc_thread_stack_node(struct task_struct *tsk, int node) { unsigned long *stack; stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); stack = kasan_reset_tag(stack); tsk->stack = stack; - return stack; + return stack ? 0 : -ENOMEM; } static void free_thread_stack(struct task_struct *tsk) @@ -332,13 +330,13 @@ void thread_stack_cache_init(void) # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ #else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) +static int alloc_thread_stack_node(struct task_struct *tsk, int node) { unsigned long *stack; stack = arch_alloc_thread_stack_node(tsk, node); tsk->stack = stack; - return stack; + return stack ? 0 : -ENOMEM; } static void free_thread_stack(struct task_struct *tsk) @@ -895,8 +893,6 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - unsigned long *stack; - struct vm_struct *stack_vm_area __maybe_unused; int err; if (node == NUMA_NO_NODE) @@ -909,24 +905,13 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (err) goto free_tsk; - stack = alloc_thread_stack_node(tsk, node); - if (!stack) + err = alloc_thread_stack_node(tsk, node); + if (err) goto free_tsk; if (memcg_charge_kernel_stack(tsk)) goto free_stack; - stack_vm_area = task_stack_vm_area(tsk); - - /* - * arch_dup_task_struct() clobbers the stack-related fields. Make - * sure they're properly initialized before using any stack-related - * functions again. - */ - tsk->stack = stack; -#ifdef CONFIG_VMAP_STACK - tsk->stack_vm_area = stack_vm_area; -#endif #ifdef CONFIG_THREAD_INFO_IN_TASK refcount_set(&tsk->stack_refcount, 1); #endif -- GitLab From f1c1a9ee00e4c53c9ccc03ec1aff4792948a25eb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:03 +0100 Subject: [PATCH 0578/1586] fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK memcg_charge_kernel_stack() is only used in the CONFIG_VMAP_STACK case. Move memcg_charge_kernel_stack() into the CONFIG_VMAP_STACK block and invoke it from within alloc_thread_stack_node(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-6-bigeasy@linutronix.de --- kernel/fork.c | 69 +++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 875bd43f02ca8..ac63e7fa88165 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -211,6 +211,32 @@ static int free_vm_stack_cache(unsigned int cpu) return 0; } +static int memcg_charge_kernel_stack(struct task_struct *tsk) +{ + struct vm_struct *vm = task_stack_vm_area(tsk); + int i; + int ret; + + BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); + BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { + ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); + if (ret) + goto err; + } + return 0; +err: + /* + * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is + * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will + * ignore this page. + */ + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + memcg_kmem_uncharge_page(vm->pages[i], 0); + return ret; +} + static int alloc_thread_stack_node(struct task_struct *tsk, int node) { void *stack; @@ -230,6 +256,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); + if (memcg_charge_kernel_stack(tsk)) { + vfree(s->addr); + return -ENOMEM; + } + tsk->stack_vm_area = s; tsk->stack = s->addr; return 0; @@ -247,6 +278,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) 0, node, __builtin_return_address(0)); if (!stack) return -ENOMEM; + + if (memcg_charge_kernel_stack(tsk)) { + vfree(stack); + return -ENOMEM; + } /* * We can't call find_vm_area() in interrupt context, and * free_thread_stack() can be called in interrupt context, @@ -418,36 +454,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account) } } -static int memcg_charge_kernel_stack(struct task_struct *tsk) -{ -#ifdef CONFIG_VMAP_STACK - struct vm_struct *vm = task_stack_vm_area(tsk); - int ret; - - BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); - - if (vm) { - int i; - - BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); - - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - /* - * If memcg_kmem_charge_page() fails, page's - * memory cgroup pointer is NULL, and - * memcg_kmem_uncharge_page() in free_thread_stack() - * will ignore this page. - */ - ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, - 0); - if (ret) - return ret; - } - } -#endif - return 0; -} - static void release_task_stack(struct task_struct *tsk) { if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) @@ -909,9 +915,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (err) goto free_tsk; - if (memcg_charge_kernel_stack(tsk)) - goto free_stack; - #ifdef CONFIG_THREAD_INFO_IN_TASK refcount_set(&tsk->stack_refcount, 1); #endif -- GitLab From 1a03d3f13ffe5dd24142d6db629e72c11b704d99 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:04 +0100 Subject: [PATCH 0579/1586] fork: Move task stack accounting to do_exit() There is no need to perform the stack accounting of the outgoing task in its final schedule() invocation which happens with preemption disabled. The task is leaving, the resources will be freed and the accounting can happen in do_exit() before the actual schedule invocation which frees the stack memory. Move the accounting of the stack memory from release_task_stack() to exit_task_stack_account() which then can be invoked from do_exit(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-7-bigeasy@linutronix.de --- include/linux/sched/task_stack.h | 2 ++ kernel/exit.c | 1 + kernel/fork.c | 35 +++++++++++++++++++++----------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index d10150587d819..892562ebbd3aa 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk) static inline void put_task_stack(struct task_struct *tsk) {} #endif +void exit_task_stack_account(struct task_struct *tsk); + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab93..c303cffe7fdb4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -845,6 +845,7 @@ void __noreturn do_exit(long code) put_page(tsk->task_frag.page); validate_creds_for_do_exit(tsk); + exit_task_stack_account(tsk); check_stack_usage(); preempt_disable(); diff --git a/kernel/fork.c b/kernel/fork.c index ac63e7fa88165..25828127db8dc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -211,9 +211,8 @@ static int free_vm_stack_cache(unsigned int cpu) return 0; } -static int memcg_charge_kernel_stack(struct task_struct *tsk) +static int memcg_charge_kernel_stack(struct vm_struct *vm) { - struct vm_struct *vm = task_stack_vm_area(tsk); int i; int ret; @@ -239,6 +238,7 @@ err: static int alloc_thread_stack_node(struct task_struct *tsk, int node) { + struct vm_struct *vm; void *stack; int i; @@ -256,7 +256,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); - if (memcg_charge_kernel_stack(tsk)) { + if (memcg_charge_kernel_stack(s)) { vfree(s->addr); return -ENOMEM; } @@ -279,7 +279,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) if (!stack) return -ENOMEM; - if (memcg_charge_kernel_stack(tsk)) { + vm = find_vm_area(stack); + if (memcg_charge_kernel_stack(vm)) { vfree(stack); return -ENOMEM; } @@ -288,19 +289,15 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - tsk->stack_vm_area = find_vm_area(stack); + tsk->stack_vm_area = vm; tsk->stack = stack; return 0; } static void free_thread_stack(struct task_struct *tsk) { - struct vm_struct *vm = task_stack_vm_area(tsk); int i; - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); - for (i = 0; i < NR_CACHED_STACKS; i++) { if (this_cpu_cmpxchg(cached_stacks[i], NULL, tsk->stack_vm_area) != NULL) @@ -454,12 +451,25 @@ static void account_kernel_stack(struct task_struct *tsk, int account) } } +void exit_task_stack_account(struct task_struct *tsk) +{ + account_kernel_stack(tsk, -1); + + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + struct vm_struct *vm; + int i; + + vm = task_stack_vm_area(tsk); + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + memcg_kmem_uncharge_page(vm->pages[i], 0); + } +} + static void release_task_stack(struct task_struct *tsk) { if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) return; /* Better to leak the stack than to free prematurely */ - account_kernel_stack(tsk, -1); free_thread_stack(tsk); } @@ -918,6 +928,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_THREAD_INFO_IN_TASK refcount_set(&tsk->stack_refcount, 1); #endif + account_kernel_stack(tsk, 1); err = scs_prepare(tsk, node); if (err) @@ -961,8 +972,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->wake_q.next = NULL; tsk->worker_private = NULL; - account_kernel_stack(tsk, 1); - kcov_task_init(tsk); kmap_local_fork(tsk); @@ -981,6 +990,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) return tsk; free_stack: + exit_task_stack_account(tsk); free_thread_stack(tsk); free_tsk: free_task_struct(tsk); @@ -2459,6 +2469,7 @@ bad_fork_cleanup_count: exit_creds(p); bad_fork_free: WRITE_ONCE(p->__state, TASK_DEAD); + exit_task_stack_account(p); put_task_stack(p); delayed_free_task(p); fork_out: -- GitLab From e540bf3162e822d7a1f07e69e3bb1b4f925ca368 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:05 +0100 Subject: [PATCH 0580/1586] fork: Only cache the VMAP stack in finish_task_switch() The task stack could be deallocated later, but for fork()/exec() kind of workloads (say a shell script executing several commands) it is important that the stack is released in finish_task_switch() so that in VMAP_STACK case it can be cached and reused in the new task. For PREEMPT_RT it would be good if the wake-up in vfree_atomic() could be avoided in the scheduling path. Far worse are the other free_thread_stack() implementations which invoke __free_pages()/ kmem_cache_free() with disabled preemption. Cache the stack in free_thread_stack() in the VMAP_STACK case and RCU-delay the free path otherwise. Free the stack in the RCU callback. In the VMAP_STACK case this is another opportunity to fill the cache. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-8-bigeasy@linutronix.de --- kernel/fork.c | 76 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 25828127db8dc..177bc64078cd5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -193,6 +193,41 @@ static inline void free_task_struct(struct task_struct *tsk) #define NR_CACHED_STACKS 2 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); +struct vm_stack { + struct rcu_head rcu; + struct vm_struct *stack_vm_area; +}; + +static bool try_release_thread_stack_to_cache(struct vm_struct *vm) +{ + unsigned int i; + + for (i = 0; i < NR_CACHED_STACKS; i++) { + if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL) + continue; + return true; + } + return false; +} + +static void thread_stack_free_rcu(struct rcu_head *rh) +{ + struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu); + + if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area)) + return; + + vfree(vm_stack); +} + +static void thread_stack_delayed_free(struct task_struct *tsk) +{ + struct vm_stack *vm_stack = tsk->stack; + + vm_stack->stack_vm_area = tsk->stack_vm_area; + call_rcu(&vm_stack->rcu, thread_stack_free_rcu); +} + static int free_vm_stack_cache(unsigned int cpu) { struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); @@ -296,24 +331,27 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) static void free_thread_stack(struct task_struct *tsk) { - int i; + if (!try_release_thread_stack_to_cache(tsk->stack_vm_area)) + thread_stack_delayed_free(tsk); - for (i = 0; i < NR_CACHED_STACKS; i++) { - if (this_cpu_cmpxchg(cached_stacks[i], NULL, - tsk->stack_vm_area) != NULL) - continue; - - tsk->stack = NULL; - tsk->stack_vm_area = NULL; - return; - } - vfree_atomic(tsk->stack); tsk->stack = NULL; tsk->stack_vm_area = NULL; } # else /* !CONFIG_VMAP_STACK */ +static void thread_stack_free_rcu(struct rcu_head *rh) +{ + __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER); +} + +static void thread_stack_delayed_free(struct task_struct *tsk) +{ + struct rcu_head *rh = tsk->stack; + + call_rcu(rh, thread_stack_free_rcu); +} + static int alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_pages_node(node, THREADINFO_GFP, @@ -328,7 +366,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) static void free_thread_stack(struct task_struct *tsk) { - __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); + thread_stack_delayed_free(tsk); tsk->stack = NULL; } @@ -337,6 +375,18 @@ static void free_thread_stack(struct task_struct *tsk) static struct kmem_cache *thread_stack_cache; +static void thread_stack_free_rcu(struct rcu_head *rh) +{ + kmem_cache_free(thread_stack_cache, rh); +} + +static void thread_stack_delayed_free(struct task_struct *tsk) +{ + struct rcu_head *rh = tsk->stack; + + call_rcu(rh, thread_stack_free_rcu); +} + static int alloc_thread_stack_node(struct task_struct *tsk, int node) { unsigned long *stack; @@ -348,7 +398,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) static void free_thread_stack(struct task_struct *tsk) { - kmem_cache_free(thread_stack_cache, tsk->stack); + thread_stack_delayed_free(tsk); tsk->stack = NULL; } -- GitLab From 0ce055f85335e48bc571114d61a70ae217039362 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Feb 2022 11:24:06 +0100 Subject: [PATCH 0581/1586] fork: Use IS_ENABLED() in account_kernel_stack() Not strickly needed but checking CONFIG_VMAP_STACK instead of task_stack_vm_area()' result allows the compiler the remove the else path in the CONFIG_VMAP_STACK case where the pointer can't be NULL. Check for CONFIG_VMAP_STACK in order to use the proper path. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220217102406.3697941-9-bigeasy@linutronix.de --- kernel/fork.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 177bc64078cd5..1279b57c4ad9e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -485,16 +485,16 @@ void vm_area_free(struct vm_area_struct *vma) static void account_kernel_stack(struct task_struct *tsk, int account) { - void *stack = task_stack_page(tsk); - struct vm_struct *vm = task_stack_vm_area(tsk); - - if (vm) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + struct vm_struct *vm = task_stack_vm_area(tsk); int i; for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, account * (PAGE_SIZE / 1024)); } else { + void *stack = task_stack_page(tsk); + /* All stack pages are in the same node. */ mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); -- GitLab From 97e58e395e9c074fd096dad13c54e9f4112cf71d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 17 Feb 2022 15:22:29 +0800 Subject: [PATCH 0582/1586] arm64: move AARCH64_BREAK_FAULT into insn-def.h If CONFIG_ARM64_LSE_ATOMICS is off, encoders for LSE-related instructions can return AARCH64_BREAK_FAULT directly in insn.h. In order to access AARCH64_BREAK_FAULT in insn.h, we can not include debug-monitors.h in insn.h, because debug-monitors.h has already depends on insn.h, so just move AARCH64_BREAK_FAULT into insn-def.h. It will be used by the following patch to eliminate unnecessary LSE-related encoders when CONFIG_ARM64_LSE_ATOMICS is off. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20220217072232.1186625-2-houtao1@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 12 ------------ arch/arm64/include/asm/insn-def.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 657c921fd784a..00c291067e57d 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -34,18 +34,6 @@ */ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE -/* - * BRK instruction encoding - * The #imm16 value should be placed at bits[20:5] within BRK ins - */ -#define AARCH64_BREAK_MON 0xd4200000 - -/* - * BRK instruction for provoking a fault on purpose - * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. - */ -#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) - #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h index 2c075f615c6ac..1a7d0d483698e 100644 --- a/arch/arm64/include/asm/insn-def.h +++ b/arch/arm64/include/asm/insn-def.h @@ -3,7 +3,21 @@ #ifndef __ASM_INSN_DEF_H #define __ASM_INSN_DEF_H +#include + /* A64 instructions are always 32 bits. */ #define AARCH64_INSN_SIZE 4 +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * BRK instruction for provoking a fault on purpose + * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. + */ +#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) + #endif /* __ASM_INSN_DEF_H */ -- GitLab From fa1114d9eba5087ba5e81aab4c56f546995e6cd3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 17 Feb 2022 15:22:30 +0800 Subject: [PATCH 0583/1586] arm64: insn: add encoders for atomic operations It is a preparation patch for eBPF atomic supports under arm64. eBPF needs support atomic[64]_fetch_add, atomic[64]_[fetch_]{and,or,xor} and atomic[64]_{xchg|cmpxchg}. The ordering semantics of eBPF atomics are the same with the implementations in linux kernel. Add three helpers to support LDCLR/LDEOR/LDSET/SWP, CAS and DMB instructions. STADD/STCLR/STEOR/STSET are simply encoded as aliases for LDADD/LDCLR/LDEOR/LDSET with XZR as the destination register, so no extra helper is added. atomic_fetch_add() and other atomic ops needs support for STLXR instruction, so extend enum aarch64_insn_ldst_type to do that. LDADD/LDEOR/LDSET/SWP and CAS instructions are only available when LSE atomics is enabled, so just return AARCH64_BREAK_FAULT directly in these newly-added helpers if CONFIG_ARM64_LSE_ATOMICS is disabled. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20220217072232.1186625-3-houtao1@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 80 +++++++++++++-- arch/arm64/lib/insn.c | 185 +++++++++++++++++++++++++++++++--- arch/arm64/net/bpf_jit.h | 11 +- 3 files changed, 253 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 6b776c8667b20..0b6b31307e684 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -205,7 +205,9 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, AARCH64_INSN_LDST_LOAD_EX, + AARCH64_INSN_LDST_LOAD_ACQ_EX, AARCH64_INSN_LDST_STORE_EX, + AARCH64_INSN_LDST_STORE_REL_EX, }; enum aarch64_insn_adsb_type { @@ -280,6 +282,36 @@ enum aarch64_insn_adr_type { AARCH64_INSN_ADR_TYPE_ADR, }; +enum aarch64_insn_mem_atomic_op { + AARCH64_INSN_MEM_ATOMIC_ADD, + AARCH64_INSN_MEM_ATOMIC_CLR, + AARCH64_INSN_MEM_ATOMIC_EOR, + AARCH64_INSN_MEM_ATOMIC_SET, + AARCH64_INSN_MEM_ATOMIC_SWP, +}; + +enum aarch64_insn_mem_order_type { + AARCH64_INSN_MEM_ORDER_NONE, + AARCH64_INSN_MEM_ORDER_ACQ, + AARCH64_INSN_MEM_ORDER_REL, + AARCH64_INSN_MEM_ORDER_ACQREL, +}; + +enum aarch64_insn_mb_type { + AARCH64_INSN_MB_SY, + AARCH64_INSN_MB_ST, + AARCH64_INSN_MB_LD, + AARCH64_INSN_MB_ISH, + AARCH64_INSN_MB_ISHST, + AARCH64_INSN_MB_ISHLD, + AARCH64_INSN_MB_NSH, + AARCH64_INSN_MB_NSHST, + AARCH64_INSN_MB_NSHLD, + AARCH64_INSN_MB_OSH, + AARCH64_INSN_MB_OSHST, + AARCH64_INSN_MB_OSHLD, +}; + #define __AARCH64_INSN_FUNCS(abbr, mask, val) \ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ { \ @@ -303,6 +335,11 @@ __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) __AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) +__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000) +__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000) +__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) +__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) +__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) @@ -474,13 +511,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register state, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, @@ -541,6 +571,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_policy policy); +#ifdef CONFIG_ARM64_LSE_ATOMICS +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order); +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order); +#else +static inline +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} + +static inline +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} +#endif +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); + s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index e485cd735261c..5e90887deec4a 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, switch (type) { case AARCH64_INSN_LDST_LOAD_EX: + case AARCH64_INSN_LDST_LOAD_ACQ_EX: insn = aarch64_insn_get_load_ex_value(); + if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX) + insn |= BIT(15); break; case AARCH64_INSN_LDST_STORE_EX: + case AARCH64_INSN_LDST_STORE_REL_EX: insn = aarch64_insn_get_store_ex_value(); + if (type == AARCH64_INSN_LDST_STORE_REL_EX) + insn |= BIT(15); break; default: pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type); @@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +#ifdef CONFIG_ARM64_LSE_ATOMICS +static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - u32 insn = aarch64_insn_get_ldadd_value(); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = 2; + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = 1; + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = 3; + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~GENMASK(23, 22); + insn |= order << 22; + + return insn; +} + +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (op) { + case AARCH64_INSN_MEM_ATOMIC_ADD: + insn = aarch64_insn_get_ldadd_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_CLR: + insn = aarch64_insn_get_ldclr_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_EOR: + insn = aarch64_insn_get_ldeor_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SET: + insn = aarch64_insn_get_ldset_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SWP: + insn = aarch64_insn_get_swp_value(); + break; + default: + pr_err("%s: unimplemented mem atomic op %d\n", __func__, op); + return AARCH64_BREAK_FAULT; + } switch (size) { case AARCH64_INSN_SIZE_32: @@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, insn = aarch64_insn_encode_ldst_size(size, insn); + insn = aarch64_insn_encode_ldst_order(order, insn); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, result); @@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, value); } -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - /* - * STADD is simply encoded as an alias for LDADD with XZR as - * the destination register. - */ - return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address, - value, size); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = BIT(22); + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = BIT(15); + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = BIT(15) | BIT(22); + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~(BIT(15) | BIT(22)); + insn |= order; + + return insn; +} + +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (size) { + case AARCH64_INSN_SIZE_32: + case AARCH64_INSN_SIZE_64: + break; + default: + pr_err("%s: unimplemented size encoding %d\n", __func__, size); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_cas_value(); + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_cas_order(order, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + result); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + address); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + value); } +#endif static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, @@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + switch (type) { + case AARCH64_INSN_MB_SY: + opt = 0xf; + break; + case AARCH64_INSN_MB_ST: + opt = 0xe; + break; + case AARCH64_INSN_MB_LD: + opt = 0xd; + break; + case AARCH64_INSN_MB_ISH: + opt = 0xb; + break; + case AARCH64_INSN_MB_ISHST: + opt = 0xa; + break; + case AARCH64_INSN_MB_ISHLD: + opt = 0x9; + break; + case AARCH64_INSN_MB_NSH: + opt = 0x7; + break; + case AARCH64_INSN_MB_NSHST: + opt = 0x6; + break; + case AARCH64_INSN_MB_NSHLD: + opt = 0x5; + break; + default: + pr_err("%s: unknown dmb type %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_dmb_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cc0cf0f5c7c3b..9d9250c7cc729 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -89,9 +89,16 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* LSE atomics */ +/* + * LSE atomics + * + * STADD is simply encoded as an alias for LDADD with XZR as + * the destination register. + */ #define A64_STADD(sf, Rn, Rs) \ - aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) + aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \ + A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \ + AARCH64_INSN_MEM_ORDER_NONE) /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ -- GitLab From f9b5e46f4097eb298f68e5b02f70697a90a44739 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 18 Feb 2022 17:29:44 -0800 Subject: [PATCH 0584/1586] kasan: split kasan_*enabled() functions into a separate header In an upcoming commit we are going to need to call kasan_hw_tags_enabled() from arch/arm64/include/asm/mte.h. This would create a circular dependency between headers if KASAN_GENERIC or KASAN_SW_TAGS is enabled: linux/kasan.h -> linux/pgtable.h -> asm/pgtable.h -> asm/mte.h -> linux/kasan.h. Break the cycle by introducing a new header linux/kasan-enabled.h with the kasan_*enabled() functions that can be included from asm/mte.h. Link: https://linux-review.googlesource.com/id/I5b0d96c6ed0026fc790899e14d42b2fac6ab568e Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220219012945.894950-1-pcc@google.com Signed-off-by: Will Deacon --- include/linux/kasan-enabled.h | 33 +++++++++++++++++++++++++++++++++ include/linux/kasan.h | 23 +---------------------- 2 files changed, 34 insertions(+), 22 deletions(-) create mode 100644 include/linux/kasan-enabled.h diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h new file mode 100644 index 0000000000000..4b6615375022f --- /dev/null +++ b/include/linux/kasan-enabled.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_ENABLED_H +#define _LINUX_KASAN_ENABLED_H + +#ifdef CONFIG_KASAN_HW_TAGS + +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool kasan_enabled(void) +{ + return IS_ENABLED(CONFIG_KASAN); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + +#endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4a45562d88937..b6a93261c92a0 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include +#include #include #include #include @@ -83,33 +84,11 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); - -static __always_inline bool kasan_enabled(void) -{ - return static_branch_likely(&kasan_flag_enabled); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return kasan_enabled(); -} - void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ -static inline bool kasan_enabled(void) -{ - return IS_ENABLED(CONFIG_KASAN); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return false; -} - static __always_inline void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { -- GitLab From 38ddf7dafaeaf3fcdea65b3b4dfb06b4bcd9cc15 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 18 Feb 2022 17:29:45 -0800 Subject: [PATCH 0585/1586] arm64: mte: avoid clearing PSTATE.TCO on entry unless necessary On some microarchitectures, clearing PSTATE.TCO is expensive. Clearing TCO is only necessary if in-kernel MTE is enabled, or if MTE is enabled in the userspace process in synchronous (or, soon, asymmetric) mode, because we do not report uaccess faults to userspace in none or asynchronous modes. Therefore, adjust the kernel entry code to clear TCO only if necessary. Because it is now possible to switch to a task in which TCO needs to be clear from a task in which TCO is set, we also need to do the same thing on task switch. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I52d82a580bd0500d420be501af2c35fa8c90729e Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220219012945.894950-2-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mte.h | 22 ++++++++++++++++++++++ arch/arm64/kernel/entry-common.c | 3 +++ arch/arm64/kernel/entry.S | 7 ------- arch/arm64/kernel/mte.c | 3 +++ 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 075539f5f1c88..adcb937342f14 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -11,7 +11,9 @@ #ifndef __ASSEMBLY__ #include +#include #include +#include #include #include @@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +static inline void mte_disable_tco_entry(struct task_struct *task) +{ + if (!system_supports_mte()) + return; + + /* + * Re-enable tag checking (TCO set on exception entry). This is only + * necessary if MTE is enabled in either the kernel or the userspace + * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set + * for both). With MTE disabled in the kernel and disabled or + * asynchronous in userspace, tag check faults (including in uaccesses) + * are not reported, therefore there is no need to re-enable checking. + * This is beneficial on microarchitectures where re-enabling TCO is + * expensive. + */ + if (kasan_hw_tags_enabled() || + (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))) + asm volatile(SET_PSTATE_TCO(0)); +} + #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index ef7fcefb96bd1..7093b578e3250 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) { __enter_from_kernel_mode(regs); mte_check_tfsr_entry(); + mte_disable_tco_entry(current); } /* @@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void) CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); trace_hardirqs_off_finish(); + mte_disable_tco_entry(current); } static __always_inline void enter_from_user_mode(struct pt_regs *regs) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 772ec2ecf4888..e1013a83d4f01 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -308,13 +308,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING msr_s SYS_ICC_PMR_EL1, x20 alternative_else_nop_endif - /* Re-enable tag checking (TCO set on exception entry) */ -#ifdef CONFIG_ARM64_MTE -alternative_if ARM64_MTE - SET_PSTATE_TCO(0) -alternative_else_nop_endif -#endif - /* * Registers that may be useful after this macro is invoked: * diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f418ebc65f950..f983795b5eda2 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -253,6 +253,9 @@ void mte_thread_switch(struct task_struct *next) mte_update_sctlr_user(next); mte_update_gcr_excl(next); + /* TCO may not have been disabled on exception entry for the current task. */ + mte_disable_tco_entry(next); + /* * Check if an async tag exception occurred at EL1. * -- GitLab From a8a733b20109fc85a5b2e0318cef036b2c818ac3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 17 Feb 2022 10:22:37 +0530 Subject: [PATCH 0586/1586] arm64/hugetlb: Define __hugetlb_valid_size() arch_hugetlb_valid_size() can be just factored out to create another helper to be used in arch_hugetlb_migration_supported() as well. This just defines __hugetlb_valid_size() for that purpose. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1645073557-6150-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/hugetlbpage.c | 38 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index ffb9c229610ab..a33aba91ad891 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void) } #endif /* CONFIG_CMA */ -#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION -bool arch_hugetlb_migration_supported(struct hstate *h) +static bool __hugetlb_valid_size(unsigned long size) { - size_t pagesize = huge_page_size(h); - - switch (pagesize) { + switch (size) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: return pud_sect_supported(); #endif - case PMD_SIZE: case CONT_PMD_SIZE: + case PMD_SIZE: case CONT_PTE_SIZE: return true; } - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); + return false; } + +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION +bool arch_hugetlb_migration_supported(struct hstate *h) +{ + size_t pagesize = huge_page_size(h); + + if (!__hugetlb_valid_size(pagesize)) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + return false; + } + return true; +} #endif int pmd_huge(pmd_t pmd) @@ -506,16 +515,5 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { - switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - return pud_sect_supported(); -#endif - case CONT_PMD_SIZE: - case PMD_SIZE: - case CONT_PTE_SIZE: - return true; - } - - return false; + return __hugetlb_valid_size(size); } -- GitLab From f2544f5e6c691679d56bb38637d2f347075b36fa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 22 Feb 2022 13:45:18 -0800 Subject: [PATCH 0587/1586] EVM: fix the evm= __setup handler return value __setup() handlers should return 1 if the parameter is handled. Returning 0 causes the entire string to be added to init's environment strings (limited to 32 strings), unnecessarily polluting it. Using the documented string "evm=fix" causes an Unknown parameter message: Unknown kernel command line parameters "BOOT_IMAGE=/boot/bzImage-517rc5 evm=fix", will be passed to user space. and that string is added to init's environment string space: Run /sbin/init as init process with arguments: /sbin/init with environment: HOME=/ TERM=linux BOOT_IMAGE=/boot/bzImage-517rc5 evm=fix With this change, using "evm=fix" acts as expected and an invalid option ("evm=evm") causes a warning to be printed: evm: invalid "evm" mode but init's environment is not polluted with this string, as expected. Fixes: 7102ebcd65c1 ("evm: permit only valid security.evm xattrs to be updated") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Signed-off-by: Mimi Zohar --- security/integrity/evm/evm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 08f907382c618..7d87772f0ce68 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -86,7 +86,7 @@ static int __init evm_set_fixmode(char *str) else pr_err("invalid \"%s\" mode", str); - return 0; + return 1; } __setup("evm=", evm_set_fixmode); -- GitLab From c8be60c12041145e663249af261286d402b4c5e3 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:17 +0800 Subject: [PATCH 0588/1586] cpupower: Add AMD P-State capability flag Add AMD P-State capability flag in cpupower to indicate AMD new P-State kernel module support on Ryzen processors. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/helpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 33ffacee7fcb9..b4813efdfb009 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -73,6 +73,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_AMD_HW_PSTATE 0x00000100 #define CPUPOWER_CAP_AMD_PSTATEDEF 0x00000200 #define CPUPOWER_CAP_AMD_CPB_MSR 0x00000400 +#define CPUPOWER_CAP_AMD_PSTATE 0x00000800 #define CPUPOWER_AMD_CPBDIS 0x02000000 -- GitLab From 46c273a0958274f1e1e69f3540ae827a92e0660f Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:18 +0800 Subject: [PATCH 0589/1586] cpupower: Add the function to check AMD P-State enabled The processor with AMD P-State function also supports legacy ACPI hardware P-States feature as well. Once driver sets AMD P-State eanbled, the processor will respond the finer grain AMD P-State feature instead of legacy ACPI P-States. So it introduces the cpupower_amd_pstate_enabled() to check whether the current kernel enables AMD P-State or AMD CPUFreq module. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/helpers.h | 10 ++++++++++ tools/power/cpupower/utils/helpers/misc.c | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index b4813efdfb009..62771a0868712 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -11,6 +11,7 @@ #include #include +#include #include "helpers/bitmask.h" #include @@ -136,6 +137,12 @@ extern int decode_pstates(unsigned int cpu, int boost_states, extern int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int * states); + +/* AMD P-State stuff **************************/ +bool cpupower_amd_pstate_enabled(void); + +/* AMD P-State stuff **************************/ + /* * CPUID functions returning a single datum */ @@ -168,6 +175,9 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int * states) { return -1; } +static inline bool cpupower_amd_pstate_enabled(void) +{ return false; } + /* cpuid and cpuinfo helpers **************************/ static inline unsigned int cpuid_eax(unsigned int op) { return 0; }; diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index fc6e345117216..0c483cdefcc26 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -3,9 +3,11 @@ #include #include #include +#include #include "helpers/helpers.h" #include "helpers/sysfs.h" +#include "cpufreq.h" #if defined(__i386__) || defined(__x86_64__) @@ -83,6 +85,22 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) return 0; } +bool cpupower_amd_pstate_enabled(void) +{ + char *driver = cpufreq_get_driver(0); + bool ret = false; + + if (!driver) + return ret; + + if (!strcmp(driver, "amd-pstate")) + ret = true; + + cpufreq_put_driver(driver); + + return ret; +} + #endif /* #if defined(__i386__) || defined(__x86_64__) */ /* get_cpustate -- GitLab From 083792f368b8ceea7ae035b6641e9cef3aceb366 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:19 +0800 Subject: [PATCH 0590/1586] cpupower: Initial AMD P-State capability If kernel starts the AMD P-State module, the cpupower will initial the capability flag as CPUPOWER_CAP_AMD_PSTATE. And once AMD P-State capability is set, it won't need to set legacy ACPI relative capabilities anymore. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/cpuid.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 72eb435931803..eae91f11d1870 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -149,6 +149,19 @@ out: if (ext_cpuid_level >= 0x80000008 && cpuid_ebx(0x80000008) & (1 << 4)) cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU; + + if (cpupower_amd_pstate_enabled()) { + cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE; + + /* + * If AMD P-State is enabled, the firmware will treat + * AMD P-State function as high priority. + */ + cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB; + cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR; + cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE; + cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF; + } } if (cpu_info->vendor == X86_VENDOR_INTEL) { -- GitLab From e3ede97657d8cfc4fd75aecad50269534bb55aed Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:20 +0800 Subject: [PATCH 0591/1586] cpupower: Add the function to get the sysfs value from specific table Expose the helper into cpufreq header, then cpufreq driver can use this function to get the sysfs value if it has any specific sysfs interfaces. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpufreq.c | 23 ++++++++++++++++------- tools/power/cpupower/lib/cpufreq.h | 12 ++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index c3b56db8b9214..1516d23c17c98 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -83,20 +83,21 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = { [STATS_NUM_TRANSITIONS] = "stats/total_trans" }; - -static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, - enum cpufreq_value which) +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size) { unsigned long value; unsigned int len; char linebuf[MAX_LINE_LEN]; char *endp; - if (which >= MAX_CPUFREQ_VALUE_READ_FILES) + if (!table || index >= size || !table[index]) return 0; - len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which], - linebuf, sizeof(linebuf)); + len = sysfs_cpufreq_read_file(cpu, table[index], linebuf, + sizeof(linebuf)); if (len == 0) return 0; @@ -109,6 +110,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, return value; } +static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, + enum cpufreq_value which) +{ + return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files, + which, + MAX_CPUFREQ_VALUE_READ_FILES); +} + /* read access to files which contain one string */ enum cpufreq_string { @@ -124,7 +133,7 @@ static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = { static char *sysfs_cpufreq_get_one_string(unsigned int cpu, - enum cpufreq_string which) + enum cpufreq_string which) { char linebuf[MAX_LINE_LEN]; char *result; diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index 95f4fd9e2656c..2f3c840358063 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -203,6 +203,18 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency); +/* + * get the sysfs value from specific table + * + * Read the value with the sysfs file name from specific table. Does + * only work if the cpufreq driver has the specific sysfs interfaces. + */ + +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size); + #ifdef __cplusplus } #endif -- GitLab From 4a06806e5d4a781d2c81f6064985018562b2604b Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:21 +0800 Subject: [PATCH 0592/1586] cpupower: Introduce ACPI CPPC library Kernel ACPI subsytem introduced the sysfs attributes for acpi cppc library in below path: /sys/devices/system/cpu/cpuX/acpi_cppc/ And these attributes will be used for AMD P-State driver to provide some performance and frequency values. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 6 +-- tools/power/cpupower/lib/acpi_cppc.c | 59 ++++++++++++++++++++++++++++ tools/power/cpupower/lib/acpi_cppc.h | 21 ++++++++++ 3 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 tools/power/cpupower/lib/acpi_cppc.c create mode 100644 tools/power/cpupower/lib/acpi_cppc.h diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 3b1594447f294..e9b6de314654f 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -143,9 +143,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \ utils/helpers/bitmask.h \ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def -LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h -LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c -LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o +LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h +LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c +LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) override CFLAGS += -pipe diff --git a/tools/power/cpupower/lib/acpi_cppc.c b/tools/power/cpupower/lib/acpi_cppc.c new file mode 100644 index 0000000000000..c401ac331e9fa --- /dev/null +++ b/tools/power/cpupower/lib/acpi_cppc.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpupower_intern.h" +#include "acpi_cppc.h" + +/* ACPI CPPC sysfs access ***********************************************/ + +static int acpi_cppc_read_file(unsigned int cpu, const char *fname, + char *buf, size_t buflen) +{ + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/acpi_cppc/%s", + cpu, fname); + return cpupower_read_sysfs(path, buf, buflen); +} + +static const char * const acpi_cppc_value_files[] = { + [HIGHEST_PERF] = "highest_perf", + [LOWEST_PERF] = "lowest_perf", + [NOMINAL_PERF] = "nominal_perf", + [LOWEST_NONLINEAR_PERF] = "lowest_nonlinear_perf", + [LOWEST_FREQ] = "lowest_freq", + [NOMINAL_FREQ] = "nominal_freq", + [REFERENCE_PERF] = "reference_perf", + [WRAPAROUND_TIME] = "wraparound_time" +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, enum acpi_cppc_value which) +{ + unsigned long long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + + if (which >= MAX_CPPC_VALUE_FILES) + return 0; + + len = acpi_cppc_read_file(cpu, acpi_cppc_value_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return 0; + + value = strtoull(linebuf, &endp, 0); + + if (endp == linebuf || errno == ERANGE) + return 0; + + return value; +} diff --git a/tools/power/cpupower/lib/acpi_cppc.h b/tools/power/cpupower/lib/acpi_cppc.h new file mode 100644 index 0000000000000..85ca83080316a --- /dev/null +++ b/tools/power/cpupower/lib/acpi_cppc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_CPPC_H__ +#define __ACPI_CPPC_H__ + +enum acpi_cppc_value { + HIGHEST_PERF, + LOWEST_PERF, + NOMINAL_PERF, + LOWEST_NONLINEAR_PERF, + LOWEST_FREQ, + NOMINAL_FREQ, + REFERENCE_PERF, + WRAPAROUND_TIME, + MAX_CPPC_VALUE_FILES +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, + enum acpi_cppc_value which); + +#endif /* _ACPI_CPPC_H */ -- GitLab From 33e43f3636dffe84753847eee79ea0e3527105e6 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:22 +0800 Subject: [PATCH 0593/1586] cpupower: Add AMD P-State sysfs definition and access helper Introduce the marco definitions and access helper function for AMD P-State sysfs interfaces such as each performance goals and frequency levels in amd helper file. They will be used to read the sysfs attribute from AMD P-State cpufreq driver for cpupower utilities. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/amd.c | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 97f2c857048e1..4d45d1b44164f 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -8,7 +8,10 @@ #include #include "helpers/helpers.h" +#include "cpufreq.h" +#include "acpi_cppc.h" +/* ACPI P-States Helper Functions for AMD Processors ***************/ #define MSR_AMD_PSTATE_STATUS 0xc0010063 #define MSR_AMD_PSTATE 0xc0010064 #define MSR_AMD_PSTATE_LIMIT 0xc0010061 @@ -146,4 +149,31 @@ int amd_pci_get_num_boost_states(int *active, int *states) pci_cleanup(pci_acc); return 0; } + +/* ACPI P-States Helper Functions for AMD Processors ***************/ + +/* AMD P-State Helper Functions ************************************/ +enum amd_pstate_value { + AMD_PSTATE_HIGHEST_PERF, + AMD_PSTATE_MAX_FREQ, + AMD_PSTATE_LOWEST_NONLINEAR_FREQ, + MAX_AMD_PSTATE_VALUE_READ_FILES, +}; + +static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = { + [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf", + [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq", + [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq", +}; + +static unsigned long amd_pstate_get_data(unsigned int cpu, + enum amd_pstate_value value) +{ + return cpufreq_get_sysfs_value_from_table(cpu, + amd_pstate_value_files, + value, + MAX_AMD_PSTATE_VALUE_READ_FILES); +} + +/* AMD P-State Helper Functions ************************************/ #endif /* defined(__i386__) || defined(__x86_64__) */ -- GitLab From bf9801baa81802dac7e2a5318944ca2f4bfa74ef Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:23 +0800 Subject: [PATCH 0594/1586] cpupower: Enable boost state support for AMD P-State module The legacy ACPI hardware P-States function has 3 P-States on ACPI table, the CPU frequency only can be switched between the 3 P-States. While the processor supports the boost state, it will have another boost state that the frequency can be higher than P0 state, and the state can be decoded by the function of decode_pstates() and read by amd_pci_get_num_boost_states(). However, the new AMD P-State function is different than legacy ACPI hardware P-State on AMD processors. That has a finer grain frequency range between the highest and lowest frequency. And boost frequency is actually the frequency which is mapped on highest performance ratio. The similar previous P0 frequency is mapped on nominal performance ratio. If the highest performance on the processor is higher than nominal performance, then we think the current processor supports the boost state. And it uses amd_pstate_boost_init() to initialize boost for AMD P-State function. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/amd.c | 18 ++++++++++++++++++ tools/power/cpupower/utils/helpers/helpers.h | 5 +++++ tools/power/cpupower/utils/helpers/misc.c | 2 ++ 3 files changed, 25 insertions(+) diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 4d45d1b44164f..f5ba528dc7db8 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -175,5 +175,23 @@ static unsigned long amd_pstate_get_data(unsigned int cpu, MAX_AMD_PSTATE_VALUE_READ_FILES); } +void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) +{ + unsigned long highest_perf, nominal_perf, cpuinfo_min, + cpuinfo_max, amd_pstate_max; + + highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF); + nominal_perf = acpi_cppc_get_data(cpu, NOMINAL_PERF); + + *support = highest_perf > nominal_perf ? 1 : 0; + if (!(*support)) + return; + + cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max); + amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ); + + *active = cpuinfo_max == amd_pstate_max ? 1 : 0; +} + /* AMD P-State Helper Functions ************************************/ #endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 62771a0868712..326491e11c6ec 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -140,6 +140,8 @@ extern int cpufreq_has_boost_support(unsigned int cpu, int *support, /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); +void amd_pstate_boost_init(unsigned int cpu, + int *support, int *active); /* AMD P-State stuff **************************/ @@ -177,6 +179,9 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, static inline bool cpupower_amd_pstate_enabled(void) { return false; } +static inline void amd_pstate_boost_init(unsigned int cpu, int *support, + int *active) +{} /* cpuid and cpuinfo helpers **************************/ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 0c483cdefcc26..e0d3145434d31 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -41,6 +41,8 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, if (ret) return ret; } + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { + amd_pstate_boost_init(cpu, support, active); } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) *support = *active = 1; return 0; -- GitLab From 35fdf42d90d09d2d00ef65999fe338027a6b4d8e Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:24 +0800 Subject: [PATCH 0595/1586] cpupower: Move print_speed function into misc helper The print_speed can be as a common function, and expose it into misc helper header. Then it can be used on other helper files as well. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-info.c | 59 ++++---------------- tools/power/cpupower/utils/helpers/helpers.h | 1 + tools/power/cpupower/utils/helpers/misc.c | 40 +++++++++++++ 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index f9895e31ff5ae..b429454bf3aef 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -84,43 +84,6 @@ static void proc_cpufreq_output(void) } static int no_rounding; -static void print_speed(unsigned long speed) -{ - unsigned long tmp; - - if (no_rounding) { - if (speed > 1000000) - printf("%u.%06u GHz", ((unsigned int) speed/1000000), - ((unsigned int) speed%1000000)); - else if (speed > 1000) - printf("%u.%03u MHz", ((unsigned int) speed/1000), - (unsigned int) (speed%1000)); - else - printf("%lu kHz", speed); - } else { - if (speed > 1000000) { - tmp = speed%10000; - if (tmp >= 5000) - speed += 10000; - printf("%u.%02u GHz", ((unsigned int) speed/1000000), - ((unsigned int) (speed%1000000)/10000)); - } else if (speed > 100000) { - tmp = speed%1000; - if (tmp >= 500) - speed += 1000; - printf("%u MHz", ((unsigned int) speed/1000)); - } else if (speed > 1000) { - tmp = speed%100; - if (tmp >= 50) - speed += 100; - printf("%u.%01u MHz", ((unsigned int) speed/1000), - ((unsigned int) (speed%1000)/100)); - } - } - - return; -} - static void print_duration(unsigned long duration) { unsigned long tmp; @@ -254,11 +217,11 @@ static int get_boost_mode(unsigned int cpu) if (freqs) { printf(_(" boost frequency steps: ")); while (freqs->next) { - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf(", "); freqs = freqs->next; } - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf("\n"); cpufreq_put_available_frequencies(freqs); } @@ -277,7 +240,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human) return -EINVAL; } if (human) { - print_speed(freq); + print_speed(freq, no_rounding); } else printf("%lu", freq); printf(_(" (asserted by call to kernel)\n")); @@ -296,7 +259,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human) return -EINVAL; } if (human) { - print_speed(freq); + print_speed(freq, no_rounding); } else printf("%lu", freq); printf(_(" (asserted by call to hardware)\n")); @@ -316,9 +279,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human) if (human) { printf(_(" hardware limits: ")); - print_speed(min); + print_speed(min, no_rounding); printf(" - "); - print_speed(max); + print_speed(max, no_rounding); printf("\n"); } else { printf("%lu %lu\n", min, max); @@ -350,9 +313,9 @@ static int get_policy(unsigned int cpu) return -EINVAL; } printf(_(" current policy: frequency should be within ")); - print_speed(policy->min); + print_speed(policy->min, no_rounding); printf(_(" and ")); - print_speed(policy->max); + print_speed(policy->max, no_rounding); printf(".\n "); printf(_("The governor \"%s\" may decide which speed to use\n" @@ -436,7 +399,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human) struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time); while (stats) { if (human) { - print_speed(stats->frequency); + print_speed(stats->frequency, no_rounding); printf(":%.2f%%", (100.0 * stats->time_in_state) / total_time); } else @@ -486,11 +449,11 @@ static void debug_output_one(unsigned int cpu) if (freqs) { printf(_(" available frequency steps: ")); while (freqs->next) { - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf(", "); freqs = freqs->next; } - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf("\n"); cpufreq_put_available_frequencies(freqs); } diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 326491e11c6ec..fa2a8c1b1d269 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -200,5 +200,6 @@ extern struct bitmask *offline_cpus; void get_cpustate(void); void print_online_cpus(void); void print_offline_cpus(void); +void print_speed(unsigned long speed, int no_rounding); #endif /* __CPUPOWERUTILS_HELPERS__ */ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index e0d3145434d31..9547b29254a7f 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -164,3 +164,43 @@ void print_offline_cpus(void) printf(_("cpupower set operation was not performed on them\n")); } } + +/* + * print_speed + * + * Print the exact CPU frequency with appropriate unit + */ +void print_speed(unsigned long speed, int no_rounding) +{ + unsigned long tmp; + + if (no_rounding) { + if (speed > 1000000) + printf("%u.%06u GHz", ((unsigned int)speed / 1000000), + ((unsigned int)speed % 1000000)); + else if (speed > 1000) + printf("%u.%03u MHz", ((unsigned int)speed / 1000), + (unsigned int)(speed % 1000)); + else + printf("%lu kHz", speed); + } else { + if (speed > 1000000) { + tmp = speed % 10000; + if (tmp >= 5000) + speed += 10000; + printf("%u.%02u GHz", ((unsigned int)speed / 1000000), + ((unsigned int)(speed % 1000000) / 10000)); + } else if (speed > 100000) { + tmp = speed % 1000; + if (tmp >= 500) + speed += 1000; + printf("%u MHz", ((unsigned int)speed / 1000)); + } else if (speed > 1000) { + tmp = speed % 100; + if (tmp >= 50) + speed += 100; + printf("%u.%01u MHz", ((unsigned int)speed / 1000), + ((unsigned int)(speed % 1000) / 100)); + } + } +} -- GitLab From d8363e29178249bb505ae388ce1658484396fcde Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:25 +0800 Subject: [PATCH 0596/1586] cpupower: Add function to print AMD P-State performance capabilities AMD P-State kernel module is using the fine grain frequency instead of acpi hardware pstate. So add a function to print performance and frequency values. Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-info.c | 9 ++++-- tools/power/cpupower/utils/helpers/amd.c | 29 ++++++++++++++++++++ tools/power/cpupower/utils/helpers/helpers.h | 5 ++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index b429454bf3aef..235243ec5ce0d 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -146,9 +146,12 @@ static int get_boost_mode_x86(unsigned int cpu) printf(_(" Supported: %s\n"), support ? _("yes") : _("no")); printf(_(" Active: %s\n"), active ? _("yes") : _("no")); - if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && - cpupower_cpu_info.family >= 0x10) || - cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { + return 0; + } else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.family >= 0x10) || + cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { ret = decode_pstates(cpu, b_states, pstates, &pstate_no); if (ret) return ret; diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index f5ba528dc7db8..c519cc89c97f4 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -193,5 +193,34 @@ void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) *active = cpuinfo_max == amd_pstate_max ? 1 : 0; } +void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) +{ + printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "), + amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF)); + /* + * If boost isn't active, the cpuinfo_max doesn't indicate real max + * frequency. So we read it back from amd-pstate sysfs entry. + */ + print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding); + printf(".\n"); + + printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "), + acpi_cppc_get_data(cpu, NOMINAL_PERF)); + print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000, + no_rounding); + printf(".\n"); + + printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), + acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF)); + print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ), + no_rounding); + printf(".\n"); + + printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "), + acpi_cppc_get_data(cpu, LOWEST_PERF)); + print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding); + printf(".\n"); +} + /* AMD P-State Helper Functions ************************************/ #endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index fa2a8c1b1d269..96e4bede078b0 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -142,6 +142,8 @@ extern int cpufreq_has_boost_support(unsigned int cpu, int *support, bool cpupower_amd_pstate_enabled(void); void amd_pstate_boost_init(unsigned int cpu, int *support, int *active); +void amd_pstate_show_perf_and_freq(unsigned int cpu, + int no_rounding); /* AMD P-State stuff **************************/ @@ -182,6 +184,9 @@ static inline bool cpupower_amd_pstate_enabled(void) static inline void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) {} +static inline void amd_pstate_show_perf_and_freq(unsigned int cpu, + int no_rounding) +{} /* cpuid and cpuinfo helpers **************************/ -- GitLab From 4d986ffa036a773456476f70bd0fde2fb1330b7d Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 14 Feb 2022 18:00:07 -0800 Subject: [PATCH 0597/1586] spi: add missing pci_dev_put() before return pci_get_slot() increases its reference count, the caller must decrement the reference count by calling pci_dev_put() Signed-off-by: Wang Qing Link: https://lore.kernel.org/r/1644890407-65167-1-git-send-email-wangqing@vivo.com Signed-off-by: Mark Brown --- drivers/spi/spi-topcliff-pch.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 8c4615b763398..8e1cc345810a6 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -877,7 +877,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw) dev_err(&data->master->dev, "ERROR: dma_request_channel FAILS(Tx)\n"); data->use_dma = 0; - return; + goto out; } dma->chan_tx = chan; @@ -894,9 +894,12 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw) dma_release_channel(dma->chan_tx); dma->chan_tx = NULL; data->use_dma = 0; - return; + goto out; } dma->chan_rx = chan; + +out: + pci_dev_put(dma_dev); } static void pch_spi_release_dma(struct pch_spi_data *data) -- GitLab From 4363f3d3ce8f5440dfbcd66b6a6800b42a58ba6a Mon Sep 17 00:00:00 2001 From: Harman Kalra Date: Fri, 4 Feb 2022 18:16:01 +0530 Subject: [PATCH 0598/1586] crypto: octeontx2 - add synchronization between mailbox accesses Since there are two workqueues implemented in CPTPF driver - one for handling mailbox requests from VFs and another for handling FLR. In both cases PF driver will forward the request to AF driver by writing to mailbox memory. A race condition may arise if two simultaneous requests are written to mailbox memory. Introducing locking mechanism to maintain synchronization between multiple mailbox accesses. Signed-off-by: Harman Kalra Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cpt_common.h | 1 + .../marvell/octeontx2/otx2_cpt_mbox_common.c | 14 +++++++++++ drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 1 + .../marvell/octeontx2/otx2_cptpf_main.c | 21 ++++++++++------- .../marvell/octeontx2/otx2_cptpf_mbox.c | 23 ++++++++++++++----- 5 files changed, 46 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index fb56824cb0a6f..5012b7e669f07 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -157,5 +157,6 @@ struct otx2_cptlfs_info; int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs); int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs); int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox); #endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c index 9074876d38e5d..a317319696eff 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -202,3 +202,17 @@ int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs) } return ret; } + +int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox) +{ + int err; + + if (!otx2_mbox_nonempty(mbox, 0)) + return 0; + otx2_mbox_msg_send(mbox, 0); + err = otx2_mbox_wait_for_rsp(mbox, 0); + if (err) + return err; + + return otx2_mbox_check_rsp_msgs(mbox, 0); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index 05b2d9c650e10..936174b012e8e 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -46,6 +46,7 @@ struct otx2_cptpf_dev { struct workqueue_struct *flr_wq; struct cptpf_flr_work *flr_work; + struct mutex lock; /* serialize mailbox access */ unsigned long cap_flag; u8 pf_id; /* RVU PF number */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 1720a5bb70161..17a9dd20c8c35 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -140,6 +140,7 @@ static void cptpf_flr_wq_handler(struct work_struct *work) vf = flr_work - pf->flr_work; + mutex_lock(&pf->lock); req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), sizeof(struct msg_rsp)); if (!req) @@ -151,16 +152,19 @@ static void cptpf_flr_wq_handler(struct work_struct *work) req->pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK; otx2_cpt_send_mbox_msg(mbox, pf->pdev); + if (!otx2_cpt_sync_mbox_msg(&pf->afpf_mbox)) { - if (vf >= 64) { - reg = 1; - vf = vf - 64; + if (vf >= 64) { + reg = 1; + vf = vf - 64; + } + /* Clear transaction pending register */ + otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf)); } - /* Clear transaction pending register */ - otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, - RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); - otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, - RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf)); + mutex_unlock(&pf->lock); } static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg) @@ -468,6 +472,7 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf) goto error; INIT_WORK(&cptpf->afpf_mbox_work, otx2_cptpf_afpf_mbox_handler); + mutex_init(&cptpf->lock); return 0; error: diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c index 186f1c1190c1a..fee758b86d29f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -18,6 +18,7 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf, struct mbox_msghdr *msg; int ret; + mutex_lock(&cptpf->lock); msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size); if (msg == NULL) return -ENOMEM; @@ -29,15 +30,19 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf, msg->sig = req->sig; msg->ver = req->ver; - otx2_mbox_msg_send(&cptpf->afpf_mbox, 0); - ret = otx2_mbox_wait_for_rsp(&cptpf->afpf_mbox, 0); + ret = otx2_cpt_sync_mbox_msg(&cptpf->afpf_mbox); + /* Error code -EIO indicate there is a communication failure + * to the AF. Rest of the error codes indicate that AF processed + * VF messages and set the error codes in response messages + * (if any) so simply forward responses to VF. + */ if (ret == -EIO) { - dev_err(&cptpf->pdev->dev, "RVU MBOX timeout.\n"); + dev_warn(&cptpf->pdev->dev, + "AF not responding to VF%d messages\n", vf->vf_id); + mutex_unlock(&cptpf->lock); return ret; - } else if (ret) { - dev_err(&cptpf->pdev->dev, "RVU MBOX error: %d.\n", ret); - return -EFAULT; } + mutex_unlock(&cptpf->lock); return 0; } @@ -204,6 +209,10 @@ void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work) if (err == -ENOMEM || err == -EIO) break; offset = msg->next_msgoff; + /* Write barrier required for VF responses which are handled by + * PF driver and not forwarded to AF. + */ + smp_wmb(); } /* Send mbox responses to VF */ if (mdev->num_msgs) @@ -350,6 +359,8 @@ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work) process_afpf_mbox_msg(cptpf, msg); offset = msg->next_msgoff; + /* Sync VF response ready to be sent */ + smp_wmb(); mdev->msgs_acked++; } otx2_mbox_reset(afpf_mbox, 0); -- GitLab From f17f3f82420f0b6914638f298064816c1efcbec3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 16 Feb 2022 19:44:06 +0800 Subject: [PATCH 0599/1586] crypto: x86/blowfish - Remove unused inline functions This is unused after commit c0a64926c53e ("crypto: x86/blowfish - drop CTR mode implementation") Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish_glue.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index a880e0b1c2555..fda6066437aa3 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -32,24 +32,12 @@ static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) __blowfish_enc_blk(ctx, dst, src, false); } -static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, true); -} - static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src) { __blowfish_enc_blk_4way(ctx, dst, src, false); } -static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, true); -} - static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); -- GitLab From c143a603c9abf3297b87ce1d2827883c67efc385 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 16 Feb 2022 19:45:21 +0800 Subject: [PATCH 0600/1586] crypto: x86/des3 - Remove unused inline function des3_ede_enc_blk_3way() This is unused after commit 768db5fee3bb ("crypto: x86/des - drop CTR mode implementation") Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 787c234d2469c..abb8b1fe123b4 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -45,14 +45,6 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); } -static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, - const u8 *src) -{ - u32 *enc_ctx = ctx->enc.expkey; - - des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src); -} - static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, const u8 *src) { -- GitLab From 1fb37b5692c915edcc2448a6b37255738c7c77e0 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 17 Feb 2022 21:27:26 +0200 Subject: [PATCH 0601/1586] crypto: ccree - don't attempt 0 len DMA mappings Refuse to try mapping zero bytes as this may cause a fault on some configurations / platforms and it seems the prev. attempt is not enough and we need to be more explicit. Signed-off-by: Gilad Ben-Yossef Reported-by: Corentin Labbe Fixes: ce0fc6db38de ("crypto: ccree - protect against empty or NULL scatterlists") Tested-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a5e041d9d2cf1..11e0278c8631d 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, { int ret = 0; + if (!nbytes) { + *mapped_nents = 0; + *lbytes = 0; + *nents = 0; + return 0; + } + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); if (*nents > max_sg_nents) { *nents = 0; -- GitLab From 0a2a464f863187f97e96ebc6384c052cafd4a54c Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 19 Feb 2022 16:08:08 +0800 Subject: [PATCH 0602/1586] crypto: hisilicon/sec - fix the aead software fallback for engine Due to the subreq pointer misuse the private context memory. The aead soft crypto occasionally casues the OS panic as setting the 64K page. Here is fix it. Fixes: 6c46a3297bea ("crypto: hisilicon/sec - add fallback tfm...") Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 8caba9fd1f19c..a91635c348b5e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2295,9 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, struct aead_request *aead_req, bool encrypt) { - struct aead_request *subreq = aead_request_ctx(aead_req); struct sec_auth_ctx *a_ctx = &ctx->a_ctx; struct device *dev = ctx->dev; + struct aead_request *subreq; + int ret; /* Kunpeng920 aead mode not support input 0 size */ if (!a_ctx->fallback_aead_tfm) { @@ -2305,6 +2306,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, return -EINVAL; } + subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL); + if (!subreq) + return -ENOMEM; + aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm); aead_request_set_callback(subreq, aead_req->base.flags, aead_req->base.complete, aead_req->base.data); @@ -2312,8 +2317,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, aead_req->cryptlen, aead_req->iv); aead_request_set_ad(subreq, aead_req->assoclen); - return encrypt ? crypto_aead_encrypt(subreq) : - crypto_aead_decrypt(subreq); + if (encrypt) + ret = crypto_aead_encrypt(subreq); + else + ret = crypto_aead_decrypt(subreq); + aead_request_free(subreq); + + return ret; } static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) -- GitLab From 4509d950a6764d18a99776614513d280cca422d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Feb 2022 08:28:52 +0100 Subject: [PATCH 0603/1586] x86/pat: Remove the unused set_pages_array_wt() function Commit 623dffb2a2e0 ("x86/mm/pat: Add set_memory_wt() for Write-Through type") added it but there were no users. [ bp: Add a commit message. ] Signed-off-by: Christoph Hellwig Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220223072852.616143-1-hch@lst.de --- arch/x86/include/asm/set_memory.h | 1 - arch/x86/mm/pat/set_memory.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a1..60bdede41466d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -53,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); -int set_pages_array_wt(struct page **pages, int addrinarray); int set_pages_array_wb(struct page **pages, int addrinarray); /* diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index b4072115c8ef6..9bdaf828ee68f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2121,12 +2121,6 @@ int set_pages_array_wc(struct page **pages, int numpages) } EXPORT_SYMBOL(set_pages_array_wc); -int set_pages_array_wt(struct page **pages, int numpages) -{ - return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); -} -EXPORT_SYMBOL_GPL(set_pages_array_wt); - int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); -- GitLab From 6198311093dabcafbe345d580c56b5d5a9ab5f3c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 22 Feb 2022 21:57:38 +0300 Subject: [PATCH 0604/1586] x86/cc: Move arch/x86/{kernel/cc_platform.c => coco/core.c} Move cc_platform.c to arch/x86/coco/. The directory is going to be the home space for code related to confidential computing. Intel TDX code will land here. AMD SEV code will also eventually be moved there. No functional changes. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20220222185740.26228-3-kirill.shutemov@linux.intel.com --- arch/x86/Kbuild | 2 ++ arch/x86/coco/Makefile | 6 ++++++ arch/x86/{kernel/cc_platform.c => coco/core.c} | 0 arch/x86/kernel/Makefile | 5 ----- 4 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 arch/x86/coco/Makefile rename arch/x86/{kernel/cc_platform.c => coco/core.c} (100%) diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index f384cb1a4f7a8..5a83da703e876 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/ + obj-y += entry/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile new file mode 100644 index 0000000000000..c1ead00017a7f --- /dev/null +++ b/arch/x86/coco/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS_REMOVE_core.o = -pg +KASAN_SANITIZE_core.o := n +CFLAGS_core.o += -fno-stack-protector + +obj-y += core.o diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/coco/core.c similarity index 100% rename from arch/x86/kernel/cc_platform.c rename to arch/x86/coco/core.c diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6aef9ee28a394..6462e3dd98f49 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -21,7 +21,6 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_sev.o = -pg -CFLAGS_REMOVE_cc_platform.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -30,7 +29,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n KASAN_SANITIZE_sev.o := n -KASAN_SANITIZE_cc_platform.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -49,7 +47,6 @@ endif KCOV_INSTRUMENT := n CFLAGS_head$(BITS).o += -fno-stack-protector -CFLAGS_cc_platform.o += -fno-stack-protector CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace @@ -151,8 +148,6 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o -obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o - ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) -- GitLab From 655a0fa34b4f7ac6e2b1406fab15e52a7b6accb1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 22 Feb 2022 21:57:39 +0300 Subject: [PATCH 0605/1586] x86/coco: Explicitly declare type of confidential computing platform The kernel derives the confidential computing platform type it is running as from sme_me_mask on AMD or by using hv_is_isolation_supported() on HyperV isolation VMs. This detection process will be more complicated as more platforms get added. Declare a confidential computing vendor variable explicitly and set it via cc_set_vendor() on the respective platform. [ bp: Massage commit message, fixup HyperV check. ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20220222185740.26228-4-kirill.shutemov@linux.intel.com --- arch/x86/coco/core.c | 29 +++++++++++++++++------------ arch/x86/include/asm/coco.h | 14 ++++++++++++++ arch/x86/kernel/cpu/mshyperv.c | 6 ++++++ arch/x86/mm/mem_encrypt_identity.c | 11 +++++++---- 4 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 arch/x86/include/asm/coco.h diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 6a6ffcd978f6e..476dcd198af5f 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -9,18 +9,15 @@ #include #include -#include -#include +#include #include -static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr) +static enum cc_vendor vendor __ro_after_init; + +static bool intel_cc_platform_has(enum cc_attr attr) { -#ifdef CONFIG_INTEL_TDX_GUEST - return false; -#else return false; -#endif } /* @@ -74,12 +71,20 @@ static bool hyperv_cc_platform_has(enum cc_attr attr) bool cc_platform_has(enum cc_attr attr) { - if (sme_me_mask) + switch (vendor) { + case CC_VENDOR_AMD: return amd_cc_platform_has(attr); - - if (hv_is_isolation_supported()) + case CC_VENDOR_INTEL: + return intel_cc_platform_has(attr); + case CC_VENDOR_HYPERV: return hyperv_cc_platform_has(attr); - - return false; + default: + return false; + } } EXPORT_SYMBOL_GPL(cc_platform_has); + +__init void cc_set_vendor(enum cc_vendor v) +{ + vendor = v; +} diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h new file mode 100644 index 0000000000000..e49f9ddb6ae62 --- /dev/null +++ b/arch/x86/include/asm/coco.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_COCO_H +#define _ASM_X86_COCO_H + +enum cc_vendor { + CC_VENDOR_NONE, + CC_VENDOR_AMD, + CC_VENDOR_HYPERV, + CC_VENDOR_INTEL, +}; + +void cc_set_vendor(enum cc_vendor v); + +#endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 5a99f993e6392..e0a5724720523 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -33,6 +33,7 @@ #include #include #include +#include /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -344,6 +345,11 @@ static void __init ms_hyperv_init_platform(void) */ swiotlb_force = SWIOTLB_FORCE; #endif + /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { + if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) + cc_set_vendor(CC_VENDOR_HYPERV); + } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 3f0abb4033403..06314ae3998e5 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -565,8 +566,7 @@ void __init sme_enable(struct boot_params *bp) } else { /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; - physical_mask &= ~sme_me_mask; - return; + goto out; } /* @@ -600,6 +600,9 @@ void __init sme_enable(struct boot_params *bp) sme_me_mask = 0; else sme_me_mask = active_by_default ? me_mask : 0; - - physical_mask &= ~sme_me_mask; +out: + if (sme_me_mask) { + physical_mask &= ~sme_me_mask; + cc_set_vendor(CC_VENDOR_AMD); + } } -- GitLab From b577f542f93cbba57f8d6185ef1fb13a41ddf162 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 22 Feb 2022 21:57:40 +0300 Subject: [PATCH 0606/1586] x86/coco: Add API to handle encryption mask AMD SME/SEV uses a bit in the page table entries to indicate that the page is encrypted and not accessible to the VMM. TDX uses a similar approach, but the polarity of the mask is opposite to AMD: if the bit is set the page is accessible to VMM. Provide vendor-neutral API to deal with the mask: cc_mkenc() and cc_mkdec() modify given address to make it encrypted/decrypted. It can be applied to phys_addr_t, pgprotval_t or page table entry value. pgprot_encrypted() and pgprot_decrypted() reimplemented using new helpers. The implementation will be extended to cover TDX. pgprot_decrypted() is used by drivers (i915, virtio_gpu, vfio). cc_mkdec() called by pgprot_decrypted(). Export cc_mkdec(). Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20220222185740.26228-5-kirill.shutemov@linux.intel.com --- arch/x86/coco/core.c | 27 +++++++++++++++++++++++++++ arch/x86/include/asm/coco.h | 18 ++++++++++++++++++ arch/x86/include/asm/pgtable.h | 13 +++++++------ arch/x86/mm/mem_encrypt_identity.c | 1 + arch/x86/mm/pat/set_memory.c | 5 +++-- 5 files changed, 56 insertions(+), 8 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 476dcd198af5f..fc1365dd927e8 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -14,6 +14,7 @@ #include static enum cc_vendor vendor __ro_after_init; +static u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) { @@ -84,7 +85,33 @@ bool cc_platform_has(enum cc_attr attr) } EXPORT_SYMBOL_GPL(cc_platform_has); +u64 cc_mkenc(u64 val) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return val | cc_mask; + default: + return val; + } +} + +u64 cc_mkdec(u64 val) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return val & ~cc_mask; + default: + return val; + } +} +EXPORT_SYMBOL_GPL(cc_mkdec); + __init void cc_set_vendor(enum cc_vendor v) { vendor = v; } + +__init void cc_set_mask(u64 mask) +{ + cc_mask = mask; +} diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index e49f9ddb6ae62..3d98c3a60d34f 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_COCO_H #define _ASM_X86_COCO_H +#include + enum cc_vendor { CC_VENDOR_NONE, CC_VENDOR_AMD, @@ -10,5 +12,21 @@ enum cc_vendor { }; void cc_set_vendor(enum cc_vendor v); +void cc_set_mask(u64 mask); + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM +u64 cc_mkenc(u64 val); +u64 cc_mkdec(u64 val); +#else +static inline u64 cc_mkenc(u64 val) +{ + return val; +} + +static inline u64 cc_mkdec(u64 val) +{ + return val; +} +#endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a9432fb3802b..62ab07e24aef9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,17 +15,12 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -/* - * Macros to add or remove encryption attribute - */ -#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) -#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) - #ifndef __ASSEMBLY__ #include #include #include #include +#include #include #include @@ -38,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); +/* + * Macros to add or remove encryption attribute + */ +#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) +#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) + #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 06314ae3998e5..b43bc24d2bb64 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -604,5 +604,6 @@ out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; cc_set_vendor(CC_VENDOR_AMD); + cc_set_mask(sme_me_mask); } } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index b4072115c8ef6..1441db69cea5d 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1989,6 +1989,7 @@ int set_memory_global(unsigned long addr, int numpages) */ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) { + pgprot_t empty = __pgprot(0); struct cpa_data cpa; int ret; @@ -1999,8 +2000,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) memset(&cpa, 0, sizeof(cpa)); cpa.vaddr = &addr; cpa.numpages = numpages; - cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); - cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); + cpa.mask_set = enc ? pgprot_encrypted(empty) : pgprot_decrypted(empty); + cpa.mask_clr = enc ? pgprot_decrypted(empty) : pgprot_encrypted(empty); cpa.pgd = init_mm.pgd; /* Must avoid aliasing mappings in the highmem code */ -- GitLab From 1e8c5971c249893ac33ca983c32bafcf5d50c727 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 22 Feb 2022 22:35:28 -0600 Subject: [PATCH 0607/1586] x86/mm/cpa: Generalize __set_memory_enc_pgtable() The kernel provides infrastructure to set or clear the encryption mask from the pages for AMD SEV, but TDX requires few tweaks. - TDX and SEV have different requirements to the cache and TLB flushing. - TDX has own routine to notify VMM about page encryption status change. Modify __set_memory_enc_pgtable() and make it flexible enough to cover both AMD SEV and Intel TDX. The AMD-specific behavior is isolated in the callbacks under x86_platform.guest. TDX will provide own version of said callbacks. [ bp: Beat into submission. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20220223043528.2093214-1-brijesh.singh@amd.com --- arch/x86/include/asm/set_memory.h | 1 - arch/x86/include/asm/x86_init.h | 16 +++++++ arch/x86/kernel/x86_init.c | 16 ++++++- arch/x86/mm/mem_encrypt_amd.c | 72 +++++++++++++++++++++---------- arch/x86/mm/pat/set_memory.c | 20 +++++---- 5 files changed, 91 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a1..ce8dd215f5b34 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -84,7 +84,6 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 22b7412c08f63..e9170457697e4 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -141,6 +141,21 @@ struct x86_init_acpi { void (*reduced_hw_early_init)(void); }; +/** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range is changed + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ +struct x86_guest { + void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -287,6 +302,7 @@ struct x86_platform_ops { struct x86_legacy_features legacy; void (*set_legacy_features)(void); struct x86_hyper_runtime hyper; + struct x86_guest guest; }; struct x86_apic_ops { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7d20c1d34a3cd..e84ee5cdbd8c6 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; +static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_tlb_flush_required_noop(bool enc) { return false; } +static bool enc_cache_flush_required_noop(void) { return false; } + struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, @@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = { .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, - .save_sched_clock_state = tsc_save_sched_clock_state, - .restore_sched_clock_state = tsc_restore_sched_clock_state, + .save_sched_clock_state = tsc_save_sched_clock_state, + .restore_sched_clock_state = tsc_restore_sched_clock_state, .hyper.pin_vcpu = x86_op_int_noop, + + .guest = { + .enc_status_change_prepare = enc_status_change_prepare_noop, + .enc_status_change_finish = enc_status_change_finish_noop, + .enc_tlb_flush_required = enc_tlb_flush_required_noop, + .enc_cache_flush_required = enc_cache_flush_required_noop, + }, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b2d018ea3450..6169053c28541 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - void __init sev_setup_arch(void) { phys_addr_t total_mem = memblock_phys_mem_size(); @@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) return pfn; } -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_tlb_flush_required(bool enc) +{ + return true; +} + +static bool amd_enc_cache_flush_required(void) +{ + return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); +} + +static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { #ifdef CONFIG_PARAVIRT unsigned long sz = npages << PAGE_SHIFT; @@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) #endif } +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +{ +} + +/* Return true unconditionally: return value doesn't matter for the SEV side */ +static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +{ + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + enc_dec_hypercall(vaddr, npages, enc); + + return true; +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { - notify_range_enc_status_changed(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages, enc); +} + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; + + x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; + x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; + x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1441db69cea5d..3b75262cfb272 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2008,10 +2008,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) kmap_flush_unused(); vm_unmap_aliases(); - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* Flush the caches as needed before changing the encryption attribute. */ + if (x86_platform.guest.enc_tlb_flush_required(enc)) + cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ + x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2024,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); - /* - * Notify hypervisor that a given memory range is mapped encrypted - * or decrypted. - */ - notify_range_enc_status_changed(addr, numpages, enc); + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ + if (!ret) { + if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc)) + ret = -EIO; + } return ret; } -- GitLab From 7b75bbdf5bedebed387aac6ad8411ed1cf3db5d0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:03 +0100 Subject: [PATCH 0608/1586] powercap/dtpm: Change locking scheme The different functions are all called through the dtpm_create_hierarchy() which handle the mutex. The different functions are used in this context, consequently with the lock always held. Remove all locks taken in the function and add the lock in the hierarchy creation function. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-1-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 95 ++++++++++++----------------------------- 1 file changed, 27 insertions(+), 68 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 414826a1509b6..0b0121c37a1b8 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -51,9 +51,7 @@ static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw) { struct dtpm *dtpm = to_dtpm(pcz); - mutex_lock(&dtpm_lock); *max_power_uw = dtpm->power_max - dtpm->power_min; - mutex_unlock(&dtpm_lock); return 0; } @@ -83,14 +81,7 @@ static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw) static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw) { - struct dtpm *dtpm = to_dtpm(pcz); - int ret; - - mutex_lock(&dtpm_lock); - ret = __get_power_uw(dtpm, power_uw); - mutex_unlock(&dtpm_lock); - - return ret; + return __get_power_uw(to_dtpm(pcz), power_uw); } static void __dtpm_rebalance_weight(struct dtpm *dtpm) @@ -133,7 +124,16 @@ static void __dtpm_add_power(struct dtpm *dtpm) } } -static int __dtpm_update_power(struct dtpm *dtpm) +/** + * dtpm_update_power - Update the power on the dtpm + * @dtpm: a pointer to a dtpm structure to update + * + * Function to update the power values of the dtpm node specified in + * parameter. These new values will be propagated to the tree. + * + * Return: zero on success, -EINVAL if the values are inconsistent + */ +int dtpm_update_power(struct dtpm *dtpm) { int ret; @@ -155,26 +155,6 @@ static int __dtpm_update_power(struct dtpm *dtpm) return ret; } -/** - * dtpm_update_power - Update the power on the dtpm - * @dtpm: a pointer to a dtpm structure to update - * - * Function to update the power values of the dtpm node specified in - * parameter. These new values will be propagated to the tree. - * - * Return: zero on success, -EINVAL if the values are inconsistent - */ -int dtpm_update_power(struct dtpm *dtpm) -{ - int ret; - - mutex_lock(&dtpm_lock); - ret = __dtpm_update_power(dtpm); - mutex_unlock(&dtpm_lock); - - return ret; -} - /** * dtpm_release_zone - Cleanup when the node is released * @pcz: a pointer to a powercap_zone structure @@ -191,20 +171,14 @@ int dtpm_release_zone(struct powercap_zone *pcz) struct dtpm *dtpm = to_dtpm(pcz); struct dtpm *parent = dtpm->parent; - mutex_lock(&dtpm_lock); - - if (!list_empty(&dtpm->children)) { - mutex_unlock(&dtpm_lock); + if (!list_empty(&dtpm->children)) return -EBUSY; - } if (parent) list_del(&dtpm->sibling); __dtpm_sub_power(dtpm); - mutex_unlock(&dtpm_lock); - if (dtpm->ops) dtpm->ops->release(dtpm); @@ -216,23 +190,12 @@ int dtpm_release_zone(struct powercap_zone *pcz) return 0; } -static int __get_power_limit_uw(struct dtpm *dtpm, int cid, u64 *power_limit) -{ - *power_limit = dtpm->power_limit; - return 0; -} - static int get_power_limit_uw(struct powercap_zone *pcz, int cid, u64 *power_limit) { - struct dtpm *dtpm = to_dtpm(pcz); - int ret; - - mutex_lock(&dtpm_lock); - ret = __get_power_limit_uw(dtpm, cid, power_limit); - mutex_unlock(&dtpm_lock); - - return ret; + *power_limit = to_dtpm(pcz)->power_limit; + + return 0; } /* @@ -292,7 +255,7 @@ static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit) ret = __set_power_limit_uw(child, cid, power); if (!ret) - ret = __get_power_limit_uw(child, cid, &power); + ret = get_power_limit_uw(&child->zone, cid, &power); if (ret) break; @@ -310,8 +273,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz, struct dtpm *dtpm = to_dtpm(pcz); int ret; - mutex_lock(&dtpm_lock); - /* * Don't allow values outside of the power range previously * set when initializing the power numbers. @@ -323,8 +284,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz, pr_debug("%s: power limit: %llu uW, power max: %llu uW\n", dtpm->zone.name, dtpm->power_limit, dtpm->power_max); - mutex_unlock(&dtpm_lock); - return ret; } @@ -335,11 +294,7 @@ static const char *get_constraint_name(struct powercap_zone *pcz, int cid) static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power) { - struct dtpm *dtpm = to_dtpm(pcz); - - mutex_lock(&dtpm_lock); - *max_power = dtpm->power_max; - mutex_unlock(&dtpm_lock); + *max_power = to_dtpm(pcz)->power_max; return 0; } @@ -442,8 +397,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) if (IS_ERR(pcz)) return PTR_ERR(pcz); - mutex_lock(&dtpm_lock); - if (parent) { list_add_tail(&dtpm->sibling, &parent->children); dtpm->parent = parent; @@ -459,8 +412,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n", dtpm->zone.name, dtpm->power_min, dtpm->power_max); - mutex_unlock(&dtpm_lock); - return 0; } @@ -605,8 +556,12 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) struct device_node *np; int i, ret; - if (pct) - return -EBUSY; + mutex_lock(&dtpm_lock); + + if (pct) { + ret = -EBUSY; + goto out_unlock; + } pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { @@ -648,12 +603,16 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) dtpm_subsys[i]->name, ret); } + mutex_unlock(&dtpm_lock); + return 0; out_err: powercap_unregister_control_type(pct); out_pct: pct = NULL; +out_unlock: + mutex_unlock(&dtpm_lock); return ret; } -- GitLab From 0aea2e4ec2a2bfa2d7e8820e37ba5b5ce04f20a5 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:04 +0100 Subject: [PATCH 0609/1586] powercap/dtpm_cpu: Reset per_cpu variable in the release function The release function does not reset the per cpu variable when it is called. That will prevent creation again as the variable will be already from the previous creation. Fix it by resetting them. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-2-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index eed5ad688d467..71f45d2f5a606 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -151,10 +151,17 @@ static int update_pd_power_uw(struct dtpm *dtpm) static void pd_release(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct cpufreq_policy *policy; if (freq_qos_request_active(&dtpm_cpu->qos_req)) freq_qos_remove_request(&dtpm_cpu->qos_req); + policy = cpufreq_cpu_get(dtpm_cpu->cpu); + if (policy) { + for_each_cpu(dtpm_cpu->cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL; + } + kfree(dtpm_cpu); } -- GitLab From 690de0b4013f6f35bc9fced12746b9f396c471ae Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:05 +0100 Subject: [PATCH 0610/1586] powercap/dtpm: Fixup kfree for virtual node When the node is virtual there is no release function associated which can free the memory. Free the memory when no 'ops' exists. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220130210210.549877-3-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 0b0121c37a1b8..7bddd25a67670 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -181,12 +181,12 @@ int dtpm_release_zone(struct powercap_zone *pcz) if (dtpm->ops) dtpm->ops->release(dtpm); + else + kfree(dtpm); if (root == dtpm) root = NULL; - kfree(dtpm); - return 0; } -- GitLab From c404c64d64bc31bebe8a2015103671f7cd282731 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:06 +0100 Subject: [PATCH 0611/1586] powercap/dtpm: Destroy hierarchy function The hierarchy creation function exits but without a destroy hierarchy function. Due to that, the modules creating the hierarchy can not be unloaded properly because they don't have an exit callback. Provide the dtpm_destroy_hierarchy() function to remove the previously created hierarchy. The function relies on all the release mechanisms implemented by the underlying powercap framework. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-4-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 43 +++++++++++++++++++++++++++++++++++++++++ include/linux/dtpm.h | 3 +++ 2 files changed, 46 insertions(+) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 7bddd25a67670..d9d74f9811186 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -617,3 +617,46 @@ out_unlock: return ret; } EXPORT_SYMBOL_GPL(dtpm_create_hierarchy); + +static void __dtpm_destroy_hierarchy(struct dtpm *dtpm) +{ + struct dtpm *child, *aux; + + list_for_each_entry_safe(child, aux, &dtpm->children, sibling) + __dtpm_destroy_hierarchy(child); + + /* + * At this point, we know all children were removed from the + * recursive call before + */ + dtpm_unregister(dtpm); +} + +void dtpm_destroy_hierarchy(void) +{ + int i; + + mutex_lock(&dtpm_lock); + + if (!pct) + goto out_unlock; + + __dtpm_destroy_hierarchy(root); + + + for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) { + + if (!dtpm_subsys[i]->exit) + continue; + + dtpm_subsys[i]->exit(); + } + + powercap_unregister_control_type(pct); + + pct = NULL; + +out_unlock: + mutex_unlock(&dtpm_lock); +} +EXPORT_SYMBOL_GPL(dtpm_destroy_hierarchy); diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index f7a25c70dd4c0..a4a13514b7306 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -37,6 +37,7 @@ struct device_node; struct dtpm_subsys_ops { const char *name; int (*init)(void); + void (*exit)(void); int (*setup)(struct dtpm *, struct device_node *); }; @@ -67,4 +68,6 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); + +void dtpm_destroy_hierarchy(void); #endif -- GitLab From 4712a236db409d5ee5dccb8c7e57fe54d7d3ec66 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:07 +0100 Subject: [PATCH 0612/1586] powercap/dtpm: Move the 'root' reset place The 'root' node is checked everytime a dtpm node is destroyed. When we reach the end of the hierarchy destruction function, we can unconditionnaly set the 'root' node to NULL again. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-5-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index d9d74f9811186..ec931a06d90ae 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -184,9 +184,6 @@ int dtpm_release_zone(struct powercap_zone *pcz) else kfree(dtpm); - if (root == dtpm) - root = NULL; - return 0; } @@ -656,6 +653,8 @@ void dtpm_destroy_hierarchy(void) pct = NULL; + root = NULL; + out_unlock: mutex_unlock(&dtpm_lock); } -- GitLab From bfded2ca8f36935ff13b3b30f8e66d6135e178ac Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:08 +0100 Subject: [PATCH 0613/1586] powercap/dtpm_cpu: Add exit function Now that we can destroy the hierarchy, the code must remove what it had put in place at the creation. In our case, the cpu hotplug callbacks. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-6-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 71f45d2f5a606..bca2f912d3496 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -299,8 +299,15 @@ static int dtpm_cpu_init(void) return 0; } +static void dtpm_cpu_exit(void) +{ + cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN); + cpuhp_remove_state_nocalls(CPUHP_AP_DTPM_CPU_DEAD); +} + struct dtpm_subsys_ops dtpm_cpu_ops = { .name = KBUILD_MODNAME, .init = dtpm_cpu_init, + .exit = dtpm_cpu_exit, .setup = dtpm_cpu_setup, }; -- GitLab From f1ebef9e55f3c49063b575e97d2019832b8f8ef9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:09 +0100 Subject: [PATCH 0614/1586] dtpm/soc/rk3399: Add the ability to unload the module The dtpm hierarchy can now be removed with the dtpm_destroy_hierarchy() function. Add the module_exit() callback so the module can be unloaded by removing the previously created hierarchy. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-7-daniel.lezcano@linaro.org --- drivers/soc/rockchip/dtpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c index ebebb748488b7..5a23784b52215 100644 --- a/drivers/soc/rockchip/dtpm.c +++ b/drivers/soc/rockchip/dtpm.c @@ -52,6 +52,12 @@ static int __init rockchip_dtpm_init(void) } module_init(rockchip_dtpm_init); +static void __exit rockchip_dtpm_exit(void) +{ + return dtpm_destroy_hierarchy(); +} +module_exit(rockchip_dtpm_exit); + MODULE_SOFTDEP("pre: panfrost cpufreq-dt"); MODULE_DESCRIPTION("Rockchip DTPM driver"); MODULE_LICENSE("GPL"); -- GitLab From 75aeaaf23def967853c8d1cfb513a6842dbc232e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 2 Feb 2022 14:43:06 +0000 Subject: [PATCH 0615/1586] EDAC/amd64: Set memory type per DIMM Current AMD systems allow mixing of DIMM types within a system. However, DIMMs within a channel, i.e. managed by a single Unified Memory Controller (UMC), must be of the same type. Handle this possible configuration by checking and setting the memory type for each individual "UMC" structure. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Reviewed-by: William Roche Link: https://lore.kernel.org/r/20220202144307.2678405-2-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 43 ++++++++++++++++++++++++++++----------- drivers/edac/amd64_edac.h | 10 ++++++++- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fba609ada0e67..388b072daa94b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1429,7 +1429,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - if (pvt->dram_type == MEM_LRDDR4) { + if (umc->dram_type == MEM_LRDDR4) { amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); @@ -1616,19 +1616,36 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } -static void determine_memory_type(struct amd64_pvt *pvt) +static void determine_memory_type_df(struct amd64_pvt *pvt) { - u32 dram_ctrl, dcsm; + struct amd64_umc *umc; + u32 i; - if (pvt->umc) { - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; + for_each_umc(i) { + umc = &pvt->umc[i]; + + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) { + umc->dram_type = MEM_EMPTY; + continue; + } + + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; else - pvt->dram_type = MEM_DDR4; - return; + umc->dram_type = MEM_DDR4; + + edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); } +} + +static void determine_memory_type(struct amd64_pvt *pvt) +{ + u32 dram_ctrl, dcsm; + + if (pvt->umc) + return determine_memory_type_df(pvt); switch (pvt->fam) { case 0xf: @@ -3452,7 +3469,9 @@ skip: read_dct_base_mask(pvt); determine_memory_type(pvt); - edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); + + if (!pvt->umc) + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); } @@ -3548,7 +3567,7 @@ static int init_csrows_df(struct mem_ctl_info *mci) pvt->mc_node_id, cs); dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); - dimm->mtype = pvt->dram_type; + dimm->mtype = pvt->umc[umc].dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; dimm->grain = 64; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 352bda9803f6c..6b8742369f9d8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -344,6 +344,9 @@ struct amd64_umc { u32 sdp_ctrl; /* SDP Control reg */ u32 ecc_ctrl; /* DRAM ECC Control reg */ u32 umc_cap_hi; /* Capabilities High reg */ + + /* cache the dram_type */ + enum mem_type dram_type; }; struct amd64_pvt { @@ -391,7 +394,12 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; - /* cache the dram_type */ + /* + * cache the dram_type + * + * NOTE: Don't use this for Family 17h and later. + * Use dram_type in struct amd64_umc instead. + */ enum mem_type dram_type; struct amd64_umc *umc; /* UMC registers */ -- GitLab From 2151c84ece920dc55942495004a823cbecb921e5 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 2 Feb 2022 14:43:07 +0000 Subject: [PATCH 0616/1586] EDAC/amd64: Add new register offset support and related changes Introduce a "family flags" bitmask that can be used to indicate any special behavior needed on a per-family basis. Add a flag to indicate a system uses the new register offsets introduced with Family 19h Model 10h. Use this flag to account for register offset changes, a new bitfield indicating DDR5 use on a memory controller, and to set the proper number of chip select masks. Rework f17_addr_mask_to_cs_size() to properly handle the change in chip select masks. And update code comments to reflect the updated Chip Select, DIMM, and Mask relationships. [uninitialized variable warning] Reported-by: kernel test robot Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Reviewed-by: William Roche Link: https://lore.kernel.org/r/20220202144307.2678405-3-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 80 +++++++++++++++++++++++++++++++-------- drivers/edac/amd64_edac.h | 14 +++++++ 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 388b072daa94b..812baa48b2906 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,6 +15,21 @@ static struct msr __percpu *msrs; static struct amd64_family_type *fam_type; +static inline u32 get_umc_reg(u32 reg) +{ + if (!fam_type->flags.zn_regs_v2) + return reg; + + switch (reg) { + case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5; + case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5; + case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5; + } + + WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg); + return 0; +} + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - if (umc->dram_type == MEM_LRDDR4) { - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); + if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) { + amd_smn_read(pvt->mc_node_id, + umc_base + get_umc_reg(UMCCH_ADDR_CFG), + &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); } @@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) for_each_umc(umc) { pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = 2; + pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2; } } else { @@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1629,12 +1646,25 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) continue; } - if (umc->dimm_cfg & BIT(5)) - umc->dram_type = MEM_LRDDR4; - else if (umc->dimm_cfg & BIT(4)) - umc->dram_type = MEM_RDDR4; - else - umc->dram_type = MEM_DDR4; + /* + * Check if the system supports the "DDR Type" field in UMC Config + * and has DDR5 DIMMs in use. + */ + if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR5; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR5; + else + umc->dram_type = MEM_DDR5; + } else { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; + else + umc->dram_type = MEM_DDR4; + } edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); } @@ -2166,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, { u32 addr_mask_orig, addr_mask_deinterleaved; u32 msb, weight, num_zero_bits; + int cs_mask_nr = csrow_nr; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -2181,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, return size; /* - * There is one mask per DIMM, and two Chip Selects per DIMM. - * CS0 and CS1 -> DIMM0 - * CS2 and CS3 -> DIMM1 + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. */ dimm = csrow_nr >> 1; + if (!fam_type->flags.zn_regs_v2) + cs_mask_nr >>= 1; + /* Asymmetric dual-rank DIMM support. */ if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; else - addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; /* * The number of zero bits in the mask is equal to the number of bits @@ -2947,6 +2994,7 @@ static struct amd64_family_type family_types[] = { .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, .max_mcs = 12, + .flags.zn_regs_v2 = 1, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -3385,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); + amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 6b8742369f9d8..38e5ad95d0109 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -273,8 +273,11 @@ #define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 #define UMCCH_ADDR_MASK_SEC 0x28 +#define UMCCH_ADDR_MASK_SEC_DDR5 0x30 #define UMCCH_ADDR_CFG 0x30 +#define UMCCH_ADDR_CFG_DDR5 0x40 #define UMCCH_DIMM_CFG 0x80 +#define UMCCH_DIMM_CFG_DDR5 0x90 #define UMCCH_UMC_CFG 0x100 #define UMCCH_SDP_CTRL 0x104 #define UMCCH_ECC_CTRL 0x14C @@ -488,11 +491,22 @@ struct low_ops { unsigned cs_mode, int cs_mask_nr); }; +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; + struct amd64_family_flags flags; struct low_ops ops; }; -- GitLab From 8382dce5e4835c045f33b8958a5f559d212cdd11 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Feb 2022 23:34:26 +0800 Subject: [PATCH 0617/1586] cpupower: Add "perf" option to print AMD P-State information Add "-c --perf" option in cpupower-frequency-info to get the performance and frequency values for AMD P-State. Commit message amended: Shuah Khan Reviewed-by: Shuah Khan Signed-off-by: Huang Rui Signed-off-by: Shuah Khan --- .../cpupower/man/cpupower-frequency-info.1 | 3 +++ tools/power/cpupower/utils/cpufreq-info.c | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index 6aa8d239dff9d..dd545b499480b 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -53,6 +53,9 @@ human\-readable output for the \-f, \-w, \-s and \-y parameters. \fB\-n\fR \fB\-\-no-rounding\fR Output frequencies and latencies without rounding off values. .TP +\fB\-c\fR \fB\-\-perf\fR +Get performances and frequencies capabilities of CPPC, by reading it from hardware (only available on the hardware with CPPC). +.TP .SH "REMARKS" .LP By default only values of core zero are displayed. How to display settings of diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 235243ec5ce0d..0646f615fe2d4 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -438,6 +438,17 @@ static int get_latency(unsigned int cpu, unsigned int human) return 0; } +/* --performance / -c */ + +static int get_perf_cap(unsigned int cpu) +{ + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) + amd_pstate_show_perf_and_freq(cpu, no_rounding); + + return 0; +} + static void debug_output_one(unsigned int cpu) { struct cpufreq_available_frequencies *freqs; @@ -466,6 +477,7 @@ static void debug_output_one(unsigned int cpu) if (get_freq_hardware(cpu, 1) < 0) get_freq_kernel(cpu, 1); get_boost_mode(cpu); + get_perf_cap(cpu); } static struct option info_opts[] = { @@ -484,6 +496,7 @@ static struct option info_opts[] = { {"proc", no_argument, NULL, 'o'}, {"human", no_argument, NULL, 'm'}, {"no-rounding", no_argument, NULL, 'n'}, + {"performance", no_argument, NULL, 'c'}, { }, }; @@ -497,7 +510,7 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts, + ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts, NULL); switch (ret) { case '?': @@ -520,6 +533,7 @@ int cmd_freq_info(int argc, char **argv) case 'e': case 's': case 'y': + case 'c': if (output_param) { output_param = -1; cont = 0; @@ -626,6 +640,9 @@ int cmd_freq_info(int argc, char **argv) case 'y': ret = get_latency(cpu, human); break; + case 'c': + ret = get_perf_cap(cpu); + break; } if (ret) return ret; -- GitLab From a586f944f3a30cfffdbda081aa094bc6845f5ba9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Feb 2022 21:19:48 +0200 Subject: [PATCH 0618/1586] spi: pxa2xx-pci: Do not dereference fwnode in struct device In order to make the underneath API easier to change in the future, prevent users from dereferencing fwnode from struct device. Instead, use the specific dev_fwnode() API for that. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220223191948.31325-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 2e134eb4bd2c9..47c8cb56a4d07 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -261,7 +261,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return PTR_ERR(ssp->clk); memset(&pi, 0, sizeof(pi)); - pi.fwnode = dev->dev.fwnode; + pi.fwnode = dev_fwnode(&dev->dev); pi.parent = &dev->dev; pi.name = "pxa2xx-spi"; pi.id = ssp->port_id; -- GitLab From 609d7ffdc42199a0ec949db057e3b4be6745d6c5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Feb 2022 21:16:37 +0200 Subject: [PATCH 0619/1586] spi: pxa2xx-pci: Balance reference count for PCI DMA device The pci_get_slot() increases its reference count, the caller must decrement the reference count by calling pci_dev_put(). Fixes: 743485ea3bee ("spi: pxa2xx-pci: Do a specific setup in a separate function") Fixes: 25014521603f ("spi: pxa2xx-pci: Enable DMA for Intel Merrifield") Reported-by: Wang Qing Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220223191637.31147-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 47c8cb56a4d07..6d60972e4e207 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -76,14 +76,23 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param) return true; } +static void lpss_dma_put_device(void *dma_dev) +{ + pci_dev_put(dma_dev); +} + static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { struct pci_dev *dma_dev; + int ret; c->num_chipselect = 1; c->max_clk_rate = 50000000; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; if (c->tx_param) { struct dw_dma_slave *slave = c->tx_param; @@ -107,8 +116,9 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { - struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); struct dw_dma_slave *tx, *rx; + struct pci_dev *dma_dev; + int ret; switch (PCI_FUNC(dev->devfn)) { case 0: @@ -133,6 +143,11 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } + dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; + tx = c->tx_param; tx->dma_dev = &dma_dev->dev; -- GitLab From da3951ebdcd1cb1d5c750e08cd05aee7b0c04d9a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 22 Feb 2022 13:46:10 +0100 Subject: [PATCH 0620/1586] random: round-robin registers as ulong, not u32 When the interrupt handler does not have a valid cycle counter, it calls get_reg() to read a register from the irq stack, in round-robin. Currently it does this assuming that registers are 32-bit. This is _probably_ the case, and probably all platforms without cycle counters are in fact 32-bit platforms. But maybe not, and either way, it's not quite correct. This commit fixes that to deal with `unsigned long` rather than `u32`. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d73a75cbe82d6..a4dedeea35e97 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1261,15 +1261,15 @@ int random_online_cpu(unsigned int cpu) } #endif -static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) +static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) { - u32 *ptr = (u32 *)regs; + unsigned long *ptr = (unsigned long *)regs; unsigned int idx; if (regs == NULL) return 0; idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(u32)) + if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) idx = 0; ptr += idx++; WRITE_ONCE(f->reg_idx, idx); -- GitLab From a3f9e8910e1584d7725ef7d5ac870920d42d0bb4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 22 Feb 2022 14:01:57 +0100 Subject: [PATCH 0621/1586] random: only wake up writers after zap if threshold was passed The only time that we need to wake up /dev/random writers on RNDCLEARPOOL/RNDZAPPOOL is when we're changing from a value that is greater than or equal to POOL_MIN_BITS to zero, because if we're changing from below POOL_MIN_BITS to zero, the writers are already unblocked. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index a4dedeea35e97..536237a0f073b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1582,7 +1582,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (xchg(&input_pool.entropy_count, 0)) { + if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) { wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } -- GitLab From 62a40dad573406cdcb489dd45cfd201717c2e765 Mon Sep 17 00:00:00 2001 From: Oscar Shiang Date: Sun, 20 Feb 2022 20:12:00 +0800 Subject: [PATCH 0622/1586] docs: Remove duplicated words in trace/osnoise-tracer There are 2 duplicated words found in osnoise tracer documentation. This patch removes them. Signed-off-by: Oscar Shiang Acked-by: Steven Rostedt (Google) Acked-by: Daniel Bristot de Oliveira Link: https://lore.kernel.org/r/TYCP286MB1913117487F390E3BCE38B15A1399@TYCP286MB1913.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Jonathan Corbet --- Documentation/trace/osnoise-tracer.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst index b648cb9bf1f02..963def9f97c6e 100644 --- a/Documentation/trace/osnoise-tracer.rst +++ b/Documentation/trace/osnoise-tracer.rst @@ -51,7 +51,7 @@ For example:: [root@f32 ~]# cd /sys/kernel/tracing/ [root@f32 tracing]# echo osnoise > current_tracer -It is possible to follow the trace by reading the trace trace file:: +It is possible to follow the trace by reading the trace file:: [root@f32 tracing]# cat trace # tracer: osnoise @@ -108,7 +108,7 @@ The tracer has a set of options inside the osnoise directory, they are: option. - tracing_threshold: the minimum delta between two time() reads to be considered as noise, in us. When set to 0, the default value will - will be used, which is currently 5 us. + be used, which is currently 5 us. Additional Tracing ------------------ -- GitLab From a5cdaea525c32e7def563ba07d9fef9bc6edffab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:18 +0100 Subject: [PATCH 0623/1586] scripts: kernel-doc: Add the basic POD sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NAME section provides the doc title, while SYNOPSIS contains the basic syntax and usage description, which will be printed in the help document and in the error output produced on wrong script usage. The rationale is to give users simple and succinct enlightment, at the same time structuring the script internally for the maintainers. In the synopsis, Rst-only options are grouped around rst, and the rest is arranged as in the OPTIONS subsections (yet to be translated into POD, check at the end of the series). The third of the basic sections, DESCRIPTION, is added separately. Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-2-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 3106b7536b893..c8fbf1d3d5aa9 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -16,6 +16,31 @@ use strict; ## This software falls under the GNU General Public License. ## ## Please read the COPYING file for more information ## +=head1 NAME + +kernel-doc - Print formatted kernel documentation to stdout + +=head1 SYNOPSIS + + kernel-doc [-h] [-v] [-Werror] + [ -man | + -rst [-sphinx-version VERSION] [-enable-lineno] | + -none + ] + [ + -export | + -internal | + [-function NAME] ... | + [-nosymbol NAME] ... + ] + [-no-doc-sections] + [-export-file FILE] ... + FILE ... + +Run `kernel-doc -h` for details. + +=cut + # 18/01/2001 - Cleanups # Functions prototyped as foo(void) same as foo() # Stop eval'ing where we don't need to. -- GitLab From 43caf1a6823dc7c156cf38a6c71881c1e90cd3c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:19 +0100 Subject: [PATCH 0624/1586] scripts: kernel-doc: Relink argument parsing error handling to pod2usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The former usage function is substituted, although not as the -h and -help parameter handler yet. Purpose: Use Pod::Usage to handle documentation printing in an integrated way. Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-3-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index c8fbf1d3d5aa9..e7f7251771bb6 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -16,6 +16,8 @@ use strict; ## This software falls under the GNU General Public License. ## ## Please read the COPYING file for more information ## +use Pod::Usage qw/pod2usage/; + =head1 NAME kernel-doc - Print formatted kernel documentation to stdout @@ -298,7 +300,13 @@ my $blankline_rst = "\n"; # read arguments if ($#ARGV == -1) { - usage(); + pod2usage( + -message => "No arguments!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); } my $kernelversion; @@ -518,8 +526,14 @@ while ($ARGV[0] =~ m/^--?(.*)/) { die "Sphinx version should either major.minor or major.minor.patch format\n"; } } else { - # Unknown argument - usage(); + # Unknown argument + pod2usage( + -message => "Argument unknown!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); } } -- GitLab From f1583922bf9383ce0079dfdded959dfc5585dc5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:20 +0100 Subject: [PATCH 0625/1586] scripts: kernel-doc: Translate the DESCRIPTION section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Transition the description section into POD. This is one of the standard documentation sections. This adjustment makes the section available for POD and makes it look better. Notes: - an article addition - paragraphing correction Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-4-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index e7f7251771bb6..e4203f13fa93f 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -41,6 +41,15 @@ kernel-doc - Print formatted kernel documentation to stdout Run `kernel-doc -h` for details. +=head1 DESCRIPTION + +Read C language source or header FILEs, extract embedded documentation comments, +and print formatted documentation to standard output. + +The documentation comments are identified by the "/**" opening comment mark. + +See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. + =cut # 18/01/2001 - Cleanups @@ -72,12 +81,6 @@ sub usage { my $message = <<"EOF"; Usage: $0 [OPTION ...] FILE ... -Read C language source or header FILEs, extract embedded documentation comments, -and print formatted documentation to standard output. - -The documentation comments are identified by "/**" opening comment mark. See -Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. - Output format selection (mutually exclusive): -man Output troff manual page format. This is the default. -rst Output reStructuredText format. -- GitLab From 2875f78708219feadf0956dcf9e936ec25fb7a8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:21 +0100 Subject: [PATCH 0626/1586] scripts: kernel-doc: Translate the "Output format selection" subsection of OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Another step in the direction of a uniform POD documentation, which will make users happier. Options land at the end of the script, not to clutter the file top. The default output format is corrected to rst. That's what it is now. A POD delimiting comment is added to the script head, which improves the script logical structure. Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-5-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index e4203f13fa93f..18eca172c4b5e 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -52,6 +52,8 @@ See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. =cut +# more perldoc at the end of the file + # 18/01/2001 - Cleanups # Functions prototyped as foo(void) same as foo() # Stop eval'ing where we don't need to. @@ -81,11 +83,6 @@ sub usage { my $message = <<"EOF"; Usage: $0 [OPTION ...] FILE ... -Output format selection (mutually exclusive): - -man Output troff manual page format. This is the default. - -rst Output reStructuredText format. - -none Do not output documentation, only warnings. - Output format selection modifier (affects only ReST output): -sphinx-version Use the ReST C domain dialect compatible with an @@ -2563,3 +2560,27 @@ if ($Werror && $warnings) { } else { exit($output_mode eq "none" ? 0 : $errors) } + +__END__ + +=head1 OPTIONS + +=head2 Output format selection (mutually exclusive): + +=over 8 + +=item -man + +Output troff manual page format. + +=item -rst + +Output reStructuredText format. This is the default. + +=item -none + +Do not output documentation, only warnings. + +=back + +=cut -- GitLab From dd803b04b0a0af16e43c2af1a3e67d7ce8e1f899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:22 +0100 Subject: [PATCH 0627/1586] scripts: kernel-doc: Translate the "Output format selection modifier" subsection of OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aim: unified POD, user more happy This section is renamed to "Output format modifiers" to make it simple. To make it even more simple, a subsection is added: "reStructuredText only". Other notes: - paragraphing correction - article correction Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-6-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 18eca172c4b5e..b926faa16b008 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -83,13 +83,6 @@ sub usage { my $message = <<"EOF"; Usage: $0 [OPTION ...] FILE ... -Output format selection modifier (affects only ReST output): - - -sphinx-version Use the ReST C domain dialect compatible with an - specific Sphinx Version. - If not specified, kernel-doc will auto-detect using - the sphinx-build version found on PATH. - Output selection (mutually exclusive): -export Only output documentation for symbols that have been exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() @@ -2583,4 +2576,19 @@ Do not output documentation, only warnings. =back +=head2 Output format modifiers + +=head3 reStructuredText only + +=over 8 + +=item -sphinx-version VERSION + +Use the ReST C domain dialect compatible with a specific Sphinx Version. + +If not specified, kernel-doc will auto-detect using the sphinx-build version +found on PATH. + +=back + =cut -- GitLab From 9c77f108f43ae08e560f54c817d4aeb4857dc783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:23 +0100 Subject: [PATCH 0628/1586] scripts: kernel-doc: Translate the "Output selection" subsection of OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aim: unified POD, user more satisfied, script better structured The plurals in -function and -nosymbol are corrected to singulars. That's how the script works now. I think this describes the syntax better. The plurar suggests multiple FILE arguments might be possible. So this seems more coherent. Other notes: - paragraphing correction - article correction Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-7-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index b926faa16b008..e49cdb307a35d 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -83,19 +83,6 @@ sub usage { my $message = <<"EOF"; Usage: $0 [OPTION ...] FILE ... -Output selection (mutually exclusive): - -export Only output documentation for symbols that have been - exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() - in any input FILE or -export-file FILE. - -internal Only output documentation for symbols that have NOT been - exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() - in any input FILE or -export-file FILE. - -function NAME Only output documentation for the given function(s) - or DOC: section title(s). All other functions and DOC: - sections are ignored. May be specified multiple times. - -nosymbol NAME Exclude the specified symbols from the output - documentation. May be specified multiple times. - Output selection modifiers: -no-doc-sections Do not output DOC: sections. -enable-lineno Enable output of #define LINENO lines. Only works with @@ -2591,4 +2578,33 @@ found on PATH. =back +=head2 Output selection (mutually exclusive): + +=over 8 + +=item -export + +Only output documentation for the symbols that have been exported using +EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE. + +=item -internal + +Only output documentation for the symbols that have NOT been exported using +EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE. + +=item -function NAME + +Only output documentation for the given function or DOC: section title. +All other functions and DOC: sections are ignored. + +May be specified multiple times. + +=item -nosymbol NAME + +Exclude the specified symbol from the output documentation. + +May be specified multiple times. + +=back + =cut -- GitLab From c15de5a19a2881205f6f893869584c99cbe4fae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:24 +0100 Subject: [PATCH 0629/1586] scripts: kernel-doc: Translate the "Output selection modifiers" subsection of OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aim: unified POD, user more satisfied, script better structured A subsection "reStructuredText only" is added for -enable-lineno. Other notes: - paragraphing correction Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-8-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index e49cdb307a35d..210e7e3b501b6 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -83,14 +83,6 @@ sub usage { my $message = <<"EOF"; Usage: $0 [OPTION ...] FILE ... -Output selection modifiers: - -no-doc-sections Do not output DOC: sections. - -enable-lineno Enable output of #define LINENO lines. Only works with - reStructuredText format. - -export-file FILE Specify an additional FILE in which to look for - EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with - -export or -internal. May be specified multiple times. - Other parameters: -v Verbose output, more warnings and other information. -h Print this help. @@ -2607,4 +2599,33 @@ May be specified multiple times. =back +=head2 Output selection modifiers: + +=over 8 + +=item -no-doc-sections + +Do not output DOC: sections. + +=item -export-file FILE + +Specify an additional FILE in which to look for EXPORT_SYMBOL() and +EXPORT_SYMBOL_GPL(). + +To be used with -export or -internal. + +May be specified multiple times. + +=back + +=head3 reStructuredText only + +=over 8 + +=item -enable-lineno + +Enable output of #define LINENO lines. + +=back + =cut -- GitLab From 834cf6b9039e6f6ebd73cc4da51cc8bc802ca777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:25 +0100 Subject: [PATCH 0630/1586] scripts: kernel-doc: Translate the "Other parameters" subsection of OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aim: unified POD, user more satisfied, script better structured Notes: - The -help token is added. - The entries are sorted alphbetically. Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-9-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 210e7e3b501b6..4a26a74318e68 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -83,11 +83,6 @@ sub usage { my $message = <<"EOF"; Usage: $0 [OPTION ...] FILE ... -Other parameters: - -v Verbose output, more warnings and other information. - -h Print this help. - -Werror Treat warnings as errors. - EOF print $message; exit 1; @@ -2628,4 +2623,22 @@ Enable output of #define LINENO lines. =back +=head2 Other parameters: + +=over 8 + +=item -h, -help + +Print this help. + +=item -v + +Verbose output, more warnings and other information. + +=item -Werror + +Treat warnings as errors. + +=back + =cut -- GitLab From 252b47da9fd9eeebbdaed448aea71010261d7dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:26 +0100 Subject: [PATCH 0631/1586] scripts: kernel-doc: Replace the usage function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aim: unified POD, user more satisfied, script better structured You can see the results with: $ scripts/kernel-doc -help Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-10-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 4a26a74318e68..d7ca4877eedad 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -79,15 +79,6 @@ See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. # 25/07/2012 - Added support for HTML5 # -- Dan Luedtke -sub usage { - my $message = <<"EOF"; -Usage: $0 [OPTION ...] FILE ... - -EOF - print $message; - exit 1; -} - # # format of comments. # In the following table, (...)? signifies optional structure. @@ -468,7 +459,7 @@ while ($ARGV[0] =~ m/^--?(.*)/) { } elsif ($cmd eq "Werror") { $Werror = 1; } elsif (($cmd eq "h") || ($cmd eq "help")) { - usage(); + pod2usage(-exitval => 0, -verbose => 2); } elsif ($cmd eq 'no-doc-sections') { $no_doc_sections = 1; } elsif ($cmd eq 'enable-lineno') { -- GitLab From 258092a89085ed9536da00f27d8ddbe083c9ea0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:27 +0100 Subject: [PATCH 0632/1586] scripts: kernel-doc: Drop obsolete comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit What for? To improve the script maintainability. 1. License As stated by Jonathan Corbet in the reply to my version 1, the SPDX line is enough. 2. The to-do list comment As suggested by Jonathan Corbet in reply to my version 3, this section doesn't need to be transitioned. And so it is removed for clarity. 3. The historical changelog comments As suggested by Jonathan Corbet in a reply to v3, this section can go. I wanted to keep it, but since it doesn't contain copyright notices, let's just have it clean and simple. 4. The "format of comments" comment block As suggested by Jani Nikula in a reply to my first version of this transformation, Documentation/doc-guide/kernel-doc.rst can serve as the information hub for comment formatting. The section DESCRIPTION already points there, so the original comment block can just be removed. Suggested-by: Jonathan Corbet Suggested-by: Jani Nikula Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-11-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 143 --------------------------------------------- 1 file changed, 143 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index d7ca4877eedad..a5a397e22ea7b 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -12,9 +12,6 @@ use strict; ## ## ## #define enhancements by Armin Kuster ## ## Copyright (c) 2000 MontaVista Software, Inc. ## -## ## -## This software falls under the GNU General Public License. ## -## Please read the COPYING file for more information ## use Pod::Usage qw/pod2usage/; @@ -54,146 +51,6 @@ See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. # more perldoc at the end of the file -# 18/01/2001 - Cleanups -# Functions prototyped as foo(void) same as foo() -# Stop eval'ing where we don't need to. -# -- huggie@earth.li - -# 27/06/2001 - Allowed whitespace after initial "/**" and -# allowed comments before function declarations. -# -- Christian Kreibich - -# Still to do: -# - add perldoc documentation -# - Look more closely at some of the scarier bits :) - -# 26/05/2001 - Support for separate source and object trees. -# Return error code. -# Keith Owens - -# 23/09/2001 - Added support for typedefs, structs, enums and unions -# Support for Context section; can be terminated using empty line -# Small fixes (like spaces vs. \s in regex) -# -- Tim Jansen - -# 25/07/2012 - Added support for HTML5 -# -- Dan Luedtke - -# -# format of comments. -# In the following table, (...)? signifies optional structure. -# (...)* signifies 0 or more structure elements -# /** -# * function_name(:)? (- short description)? -# (* @parameterx: (description of parameter x)?)* -# (* a blank line)? -# * (Description:)? (Description of function)? -# * (section header: (section description)? )* -# (*)?*/ -# -# So .. the trivial example would be: -# -# /** -# * my_function -# */ -# -# If the Description: header tag is omitted, then there must be a blank line -# after the last parameter specification. -# e.g. -# /** -# * my_function - does my stuff -# * @my_arg: its mine damnit -# * -# * Does my stuff explained. -# */ -# -# or, could also use: -# /** -# * my_function - does my stuff -# * @my_arg: its mine damnit -# * Description: Does my stuff explained. -# */ -# etc. -# -# Besides functions you can also write documentation for structs, unions, -# enums and typedefs. Instead of the function name you must write the name -# of the declaration; the struct/union/enum/typedef must always precede -# the name. Nesting of declarations is not supported. -# Use the argument mechanism to document members or constants. -# e.g. -# /** -# * struct my_struct - short description -# * @a: first member -# * @b: second member -# * -# * Longer description -# */ -# struct my_struct { -# int a; -# int b; -# /* private: */ -# int c; -# }; -# -# All descriptions can be multiline, except the short function description. -# -# For really longs structs, you can also describe arguments inside the -# body of the struct. -# eg. -# /** -# * struct my_struct - short description -# * @a: first member -# * @b: second member -# * -# * Longer description -# */ -# struct my_struct { -# int a; -# int b; -# /** -# * @c: This is longer description of C -# * -# * You can use paragraphs to describe arguments -# * using this method. -# */ -# int c; -# }; -# -# This should be use only for struct/enum members. -# -# You can also add additional sections. When documenting kernel functions you -# should document the "Context:" of the function, e.g. whether the functions -# can be called form interrupts. Unlike other sections you can end it with an -# empty line. -# A non-void function should have a "Return:" section describing the return -# value(s). -# Example-sections should contain the string EXAMPLE so that they are marked -# appropriately in DocBook. -# -# Example: -# /** -# * user_function - function that can only be called in user context -# * @a: some argument -# * Context: !in_interrupt() -# * -# * Some description -# * Example: -# * user_function(22); -# */ -# ... -# -# -# All descriptive text is further processed, scanning for the following special -# patterns, which are highlighted appropriately. -# -# 'funcname()' - function -# '$ENVVAR' - environmental variable -# '&struct_name' - name of a structure (up to two words including 'struct') -# '&struct_name.member' - name of a structure member -# '@parameter' - name of a parameter -# '%CONST' - name of a constant. -# '``LITERAL``' - literal string without any spaces on it. - ## init lots of data my $errors = 0; -- GitLab From 2b306ecaf57b2b5004dcb671a46ef24a1c369db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= Date: Fri, 18 Feb 2022 19:16:28 +0100 Subject: [PATCH 0633/1586] scripts: kernel-doc: Refresh the copyright lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I wanted to clean up these lines, but in the end decided not to touch the old ones and just add my own about POD. I'll leave the cleanup for lawyers. Signed-off-by: Tomasz Warniełło Tested-by: Randy Dunlap Acked-by: Randy Dunlap Disliked-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220218181628.1411551-12-tomasz.warniello@gmail.com Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index a5a397e22ea7b..f06f68f3c3d9b 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -12,6 +12,8 @@ use strict; ## ## ## #define enhancements by Armin Kuster ## ## Copyright (c) 2000 MontaVista Software, Inc. ## +# +# Copyright (C) 2022 Tomasz Warniełło (POD) use Pod::Usage qw/pod2usage/; -- GitLab From e334f873eb4e1638dd0b45200d2d8838a13b0cac Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Thu, 24 Feb 2022 22:02:46 +0900 Subject: [PATCH 0634/1586] docs: scripts/kernel-doc: Detect absence of FILE arg Currently, when there is no FILE argument following a switch such as -man, -rst, or -none, kernel-doc exits with a warning from perl (long msg folded): Use of uninitialized value $ARGV[0] in pattern match (m//) at ./scripts/kernel-doc line 438. , which is unhelpful. Improve the behavior by adding a check at the bottom of parsing loop. If the argument is absent, display help text and exit with the code of 1 (via usage()). Signed-off-by: Akira Yokosawa Cc: Randy Dunlap Link: https://lore.kernel.org/r/7b136049-a3ba-0eb5-8717-364d773ff914@gmail.com [jc: reworked to fix conflict with pod patches] Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index f06f68f3c3d9b..9c084a2ba3b05 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -343,14 +343,23 @@ while ($ARGV[0] =~ m/^--?(.*)/) { die "Sphinx version should either major.minor or major.minor.patch format\n"; } } else { - # Unknown argument - pod2usage( - -message => "Argument unknown!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); + # Unknown argument + pod2usage( + -message => "Argument unknown!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); + } + if ($#ARGV < 0){ + pod2usage( + -message => "FILE argument missing\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); } } -- GitLab From 024314d6d540a24cfe029421ad7e97e1d6e886b2 Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Wed, 23 Feb 2022 21:41:04 +0800 Subject: [PATCH 0635/1586] Documentation/vm/page_owner.rst: fix language There are some words that need to be fixed. Thanks for Shuah Khan's constructive suggestions. The text has been fixed as follows. a. So, if you'd like to use it, you need to add "page_owner=on" into your boot cmdline. Here, "into" has been replaced with "to". b. ...page owner is disabled in runtime due to no enabling, boot option, runtime overhead is marginal. Here, "no" has been replaced with "not". Signed-off-by: Yixuan Cao Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20220223134104.2663-1-caoyixuan2019@email.szu.edu.cn Signed-off-by: Jonathan Corbet --- Documentation/vm/page_owner.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 9837fc8147dd6..bc28edaf3de1f 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -26,9 +26,9 @@ fragmentation statistics can be obtained through gfp flag information of each page. It is already implemented and activated if page owner is enabled. Other usages are more than welcome. -page owner is disabled in default. So, if you'd like to use it, you need -to add "page_owner=on" into your boot cmdline. If the kernel is built -with page owner and page owner is disabled in runtime due to no enabling +page owner is disabled by default. So, if you'd like to use it, you need +to add "page_owner=on" to your boot cmdline. If the kernel is built +with page owner and page owner is disabled in runtime due to not enabling boot option, runtime overhead is marginal. If disabled in runtime, it doesn't require memory to store owner information, so there is no runtime memory overhead. And, page owner inserts just two unlikely branches into -- GitLab From b089f167c384c7ca0adc50c78dd7a98f747d5e03 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 22 Feb 2022 10:27:51 +0900 Subject: [PATCH 0636/1586] Documentation: block/diskstats: update function names __make_request() and end_that_request_last() do no longer exist. Replace them with the current call-site. Signed-off-by: Naohiro Aota Reviewed-by: Chaitanya Kulkarni Reviwed-by: Jens Axboe Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220222012751.1933194-1-naohiro.aota@wdc.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/iostats.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index 9b14b0c2c9c45..609a3201fd4e1 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -76,7 +76,7 @@ Field 3 -- # of sectors read (unsigned long) Field 4 -- # of milliseconds spent reading (unsigned int) This is the total number of milliseconds spent by all reads (as - measured from __make_request() to end_that_request_last()). + measured from blk_mq_alloc_request() to __blk_mq_end_request()). Field 5 -- # of writes completed (unsigned long) This is the total number of writes completed successfully. @@ -89,7 +89,7 @@ Field 7 -- # of sectors written (unsigned long) Field 8 -- # of milliseconds spent writing (unsigned int) This is the total number of milliseconds spent by all writes (as - measured from __make_request() to end_that_request_last()). + measured from blk_mq_alloc_request() to __blk_mq_end_request()). Field 9 -- # of I/Os currently in progress (unsigned int) The only field that should go to zero. Incremented as requests are @@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long) Field 15 -- # of milliseconds spent discarding (unsigned int) This is the total number of milliseconds spent by all discards (as - measured from __make_request() to end_that_request_last()). + measured from blk_mq_alloc_request() to __blk_mq_end_request()). Field 16 -- # of flush requests completed This is the total number of flush requests completed successfully. -- GitLab From 398f7abdcb7e2307facebcbdae5639f7d35916cd Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 18 Feb 2022 23:11:17 +0900 Subject: [PATCH 0637/1586] docs: pdfdocs: Pull LaTeX preamble part out of conf.py Quote from Jon's remark [1]: I do notice that Documentation/conf.py is getting large and unapproachable. At some future point, it might be nice to pull all of the latex stuff out into a separate file where it won't scare people who stumble into it by accident. Pull LaTeX preamble settings added since commit 3b4c963243b1 ("docs: conf.py: adjust the LaTeX document output") out into sphinx/kerneldoc-preamble.sty. It will be copied to the build directory by the added "latex_additional_files" setting in conf.py. As a bonus, LaTeX/TeX code can be maintained without escaping backslashes. To compensate the loss of change history in sphinx/kerneldoc-preamble.sty, here is a list of changes made in conf.py: - f7ebe6b76940 ("docs: Activate exCJK only in CJK chapters") - 0afd4df0d16a ("docs: pdfdocs: Prevent column squeezing by tabulary") - 659653c9e546 ("docs: pdfdocs: Refactor config for CJK document") - e291ff6f5a03 ("docs: pdfdocs: Add CJK-language-specific font settings") - 7eb368cc319b ("docs: pdfdocs: Choose Serif font as CJK mainfont if possible") - 35382965bdd2 ("docs: pdfdocs: Preserve inter-phrase space in Korean translations") - 77abc2c230b1 ("docs: pdfdocs: One-half spacing for CJK translations") - 788d28a25799 ("docs: pdfdocs: Permit AutoFakeSlant for CJK fonts") - 29ac9822358f ("docs: pdfdocs: Teach xeCJK about character classes of quotation marks") - 7c5c18bdb656 ("docs: pdfdocs: Fix typo in CJK-language specific font settings") - aa872e0647dc ("docs: pdfdocs: Adjust \headheight for fancyhdr") - 8716ef413aa5 ("docs: pdfdocs: Tweak width params of TOC") - 66939df53948 ("docs: pdfdocs: Switch default CJK font to KR variants") - 7b686a2ea1e4 ("docs: pdfdocs: Enable CJKspace in TOC for Korean titles") - 5d9158e3c762 ("docs/translations: Skip CJK contents if suitable fonts not found") - b774cc46313b ("docs: pdfdocs: Move CJK monospace font setting to main conf.py") [1]: https://lore.kernel.org/all/87zgmr66cn.fsf@meer.lwn.net/ Suggested-by: Jonathan Corbet Signed-off-by: Akira Yokosawa Link: https://lore.kernel.org/r/aaa9dca1-27c0-c414-77f3-c5587db0cc5b@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 184 ++------------------ Documentation/sphinx/kerneldoc-preamble.sty | 184 ++++++++++++++++++++ 2 files changed, 197 insertions(+), 171 deletions(-) create mode 100644 Documentation/sphinx/kerneldoc-preamble.sty diff --git a/Documentation/conf.py b/Documentation/conf.py index fb8f69fc4d38c..072ee31a301dc 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -409,188 +409,25 @@ latex_elements = { # Additional stuff for the LaTeX preamble. 'preamble': ''' - % Custom width parameters for TOC --- Redefine low-level commands - % defined in report.cls - \\makeatletter - %% Redefine \\@pnumwidth (page number width) - \\renewcommand*\\@pnumwidth{2.7em} - %% Redefine \\l@chapter (chapter list entry) - \\renewcommand*\\l@chapter[2]{% - \\ifnum \\c@tocdepth >\\m@ne - \\addpenalty{-\\@highpenalty}% - \\vskip 1.0em \\@plus\\p@ - \\setlength\\@tempdima{1.8em}% - \\begingroup - \\parindent \\z@ \\rightskip \\@pnumwidth - \\parfillskip -\\@pnumwidth - \\leavevmode \\bfseries - \\advance\\leftskip\\@tempdima - \\hskip -\\leftskip - #1\\nobreak\\hfil - \\nobreak\\hb@xt@\\@pnumwidth{\\hss #2% - \\kern-\\p@\\kern\\p@}\\par - \\penalty\\@highpenalty - \\endgroup - \\fi} - %% Redefine \\l@section and \\l@subsection - \\renewcommand*\\l@section{\\@dottedtocline{1}{1.8em}{3.2em}} - \\renewcommand*\\l@subsection{\\@dottedtocline{2}{5em}{4.3em}} - \\makeatother - %% Sphinx < 1.8 doesn't have \\sphinxtableofcontentshook - \\providecommand{\\sphinxtableofcontentshook}{} - %% Undefine it for compatibility with Sphinx 1.7.9 - \\renewcommand{\\sphinxtableofcontentshook}{} % Empty the hook - % Prevent column squeezing of tabulary. - \\setlength{\\tymin}{20em} % Use some font with UTF-8 support with XeLaTeX \\usepackage{fontspec} \\setsansfont{DejaVu Sans} \\setromanfont{DejaVu Serif} \\setmonofont{DejaVu Sans Mono} - % Adjust \\headheight for fancyhdr - \\addtolength{\\headheight}{1.6pt} - \\addtolength{\\topmargin}{-1.6pt} - ''', + ''', } -# Translations have Asian (CJK) characters which are only displayed if -# xeCJK is used - -latex_elements['preamble'] += ''' - \\IfFontExistsTF{Noto Sans CJK SC}{ - % This is needed for translations - \\usepackage{xeCJK} - \\IfFontExistsTF{Noto Serif CJK KR}{ - \\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant] - }{ - \\setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant] - } - \\setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant] - \\setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant] - \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘} - \\xeCJKDeclareCharClass{HalfRight}{`”,`’} - % CJK Language-specific font choices - \\IfFontExistsTF{Noto Serif CJK SC}{ - \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant] - \\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant] - }{ - \\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant] - \\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant] - } - \\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant] - \\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant] - \\IfFontExistsTF{Noto Serif CJK TC}{ - \\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant] - \\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant] - }{ - \\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant] - \\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant] - } - \\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant] - \\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant] - \\IfFontExistsTF{Noto Serif CJK KR}{ - \\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant] - \\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant] - }{ - \\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant] - \\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant] - } - \\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant] - \\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant] - \\IfFontExistsTF{Noto Serif CJK JP}{ - \\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant] - \\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant] - }{ - \\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant] - \\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant] - } - \\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant] - \\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant] - % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support) - \\providecommand{\\onehalfspacing}{} - \\providecommand{\\singlespacing}{} - % Define custom macros to on/off CJK - \\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing} - \\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing} - \\newcommand{\\kerneldocBeginSC}{% - \\begingroup% - \\scmain% - \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% - \\xeCJKDeclareCharClass{FullRight}{`”,`’}% - \\renewcommand{\\CJKrmdefault}{SCserif}% - \\renewcommand{\\CJKsfdefault}{SCsans}% - \\renewcommand{\\CJKttdefault}{SCmono}% - \\xeCJKsetup{CJKspace = false}% - % For CJK ascii-art alignment - \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]% - } - \\newcommand{\\kerneldocEndSC}{\\endgroup} - \\newcommand{\\kerneldocBeginTC}{% - \\begingroup% - \\tcmain% - \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% - \\xeCJKDeclareCharClass{FullRight}{`”,`’}% - \\renewcommand{\\CJKrmdefault}{TCserif}% - \\renewcommand{\\CJKsfdefault}{TCsans}% - \\renewcommand{\\CJKttdefault}{TCmono}% - \\xeCJKsetup{CJKspace = false}% - % For CJK ascii-art alignment - \\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]% - } - \\newcommand{\\kerneldocEndTC}{\\endgroup} - \\newcommand{\\kerneldocBeginKR}{% - \\begingroup% - \\krmain% - \\renewcommand{\\CJKrmdefault}{KRserif}% - \\renewcommand{\\CJKsfdefault}{KRsans}% - \\renewcommand{\\CJKttdefault}{KRmono}% - % \\xeCJKsetup{CJKspace = true} % true by default - % For CJK ascii-art alignment (still misaligned for Hangul) - \\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]% - } - \\newcommand{\\kerneldocEndKR}{\\endgroup} - \\newcommand{\\kerneldocBeginJP}{% - \\begingroup% - \\jpmain% - \\renewcommand{\\CJKrmdefault}{JPserif}% - \\renewcommand{\\CJKsfdefault}{JPsans}% - \\renewcommand{\\CJKttdefault}{JPmono}% - \\xeCJKsetup{CJKspace = false}% - % For CJK ascii-art alignment - \\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]% - } - \\newcommand{\\kerneldocEndJP}{\\endgroup} - % Single spacing in literal blocks - \\fvset{baselinestretch=1} - % To customize \\sphinxtableofcontents - \\usepackage{etoolbox} - % Inactivate CJK after tableofcontents - \\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{} - \\xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC - }{ % No CJK font found - % Custom macros to on/off CJK (Dummy) - \\newcommand{\\kerneldocCJKon}{} - \\newcommand{\\kerneldocCJKoff}{} - \\newcommand{\\kerneldocBeginSC}[1]{% - \\begin{sphinxadmonition}{note}{Note:} - ``Noto Sans CJK'' fonts are not found while building this PDF\\@. - Translations of zh\\_CN, zh\\_TW, ko\\_KR, and ja\\_JP are - skipped. - \\end{sphinxadmonition}} - \\newcommand{\\kerneldocEndSC}{} - \\newcommand{\\kerneldocBeginTC}[1]{} - \\newcommand{\\kerneldocEndTC}{} - \\newcommand{\\kerneldocBeginKR}[1]{} - \\newcommand{\\kerneldocEndKR}{} - \\newcommand{\\kerneldocBeginJP}[1]{} - \\newcommand{\\kerneldocEndJP}{} - } -''' - # Fix reference escape troubles with Sphinx 1.4.x if major == 1: latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n' + +# Load kerneldoc specific LaTeX settings +latex_elements['preamble'] += ''' + % Load kerneldoc specific LaTeX settings + \\input{kerneldoc-preamble.sty} +''' + # With Sphinx 1.6, it is possible to change the Bg color directly # by using: # \definecolor{sphinxnoteBgColor}{RGB}{204,255,255} @@ -652,6 +489,11 @@ for fn in os.listdir('.'): # If false, no module index is generated. #latex_domain_indices = True +# Additional LaTeX stuff to be copied to build directory +latex_additional_files = [ + 'sphinx/kerneldoc-preamble.sty', +] + # -- Options for manual page output --------------------------------------- diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty new file mode 100644 index 0000000000000..4e56ccea1dbd7 --- /dev/null +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -0,0 +1,184 @@ +% -*- coding: utf-8 -*- +% SPDX-License-Identifier: GPL-2.0 +% +% LaTeX preamble for "make latexdocs" or "make pdfdocs" including: +% - TOC width settings +% - Setting of tabulary (\tymin) +% - Headheight setting for fancyhdr +% - Fontfamily settings for CJK (Chinese, Japanese, and Korean) translations +% +% Note on the suffix of .sty: +% This is not implemented as a LaTeX style file, but as a file containing +% plain LaTeX code to be included into preamble. +% ".sty" is chosen because ".tex" would cause the build scripts to confuse +% this file with a LaTeX main file. +% +% Copyright (C) 2022 Akira Yokosawa + +% Custom width parameters for TOC --- Redefine low-level commands +% defined in report.cls +\makeatletter +%% Redefine \@pnumwidth (page number width) +\renewcommand*\@pnumwidth{2.7em} +%% Redefine \l@chapter (chapter list entry) +\renewcommand*\l@chapter[2]{% + \ifnum \c@tocdepth >\m@ne + \addpenalty{-\@highpenalty}% + \vskip 1.0em \@plus\p@ + \setlength\@tempdima{1.8em}% + \begingroup + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + \leavevmode \bfseries + \advance\leftskip\@tempdima + \hskip -\leftskip + #1\nobreak\hfil + \nobreak\hb@xt@\@pnumwidth{\hss #2% + \kern-\p@\kern\p@}\par + \penalty\@highpenalty + \endgroup + \fi} +%% Redefine \l@section and \l@subsection +\renewcommand*\l@section{\@dottedtocline{1}{1.8em}{3.2em}} +\renewcommand*\l@subsection{\@dottedtocline{2}{5em}{4.3em}} +\makeatother +%% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook +\providecommand{\sphinxtableofcontentshook}{} +%% Undefine it for compatibility with Sphinx 1.7.9 +\renewcommand{\sphinxtableofcontentshook}{} % Empty the hook +% Prevent column squeezing of tabulary. +\setlength{\tymin}{20em} + +% Adjust \headheight for fancyhdr +\addtolength{\headheight}{1.6pt} +\addtolength{\topmargin}{-1.6pt} + +% Translations have Asian (CJK) characters which are only displayed if +% xeCJK is used +\IfFontExistsTF{Noto Sans CJK SC}{ + % This is needed for translations + \usepackage{xeCJK} + \IfFontExistsTF{Noto Serif CJK KR}{ + \setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant] + }{ + \setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant] + } + \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant] + \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant] + \xeCJKDeclareCharClass{HalfLeft}{`“,`‘} + \xeCJKDeclareCharClass{HalfRight}{`”,`’} + % CJK Language-specific font choices + \IfFontExistsTF{Noto Serif CJK SC}{ + \newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant] + \newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant] + }{ + \newCJKfontfamily[SCmain]\scmain{Noto Sans CJK SC}[AutoFakeSlant] + \newCJKfontfamily[SCserif]\scserif{Noto Sans CJK SC}[AutoFakeSlant] + } + \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant] + \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant] + \IfFontExistsTF{Noto Serif CJK TC}{ + \newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant] + \newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant] + }{ + \newCJKfontfamily[TCmain]\tcmain{Noto Sans CJK TC}[AutoFakeSlant] + \newCJKfontfamily[TCserif]\tcserif{Noto Sans CJK TC}[AutoFakeSlant] + } + \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant] + \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant] + \IfFontExistsTF{Noto Serif CJK KR}{ + \newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant] + \newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant] + }{ + \newCJKfontfamily[KRmain]\krmain{Noto Sans CJK KR}[AutoFakeSlant] + \newCJKfontfamily[KRserif]\krserif{Noto Sans CJK KR}[AutoFakeSlant] + } + \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant] + \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant] + \IfFontExistsTF{Noto Serif CJK JP}{ + \newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant] + \newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant] + }{ + \newCJKfontfamily[JPmain]\jpmain{Noto Sans CJK JP}[AutoFakeSlant] + \newCJKfontfamily[JPserif]\jpserif{Noto Sans CJK JP}[AutoFakeSlant] + } + \newCJKfontfamily[JPsans]\jpsans{Noto Sans CJK JP}[AutoFakeSlant] + \newCJKfontfamily[JPmono]\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant] + % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support) + \providecommand{\onehalfspacing}{} + \providecommand{\singlespacing}{} + % Define custom macros to on/off CJK + \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing} + \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing} + \newcommand{\kerneldocBeginSC}{% + \begingroup% + \scmain% + \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% + \xeCJKDeclareCharClass{FullRight}{`”,`’}% + \renewcommand{\CJKrmdefault}{SCserif}% + \renewcommand{\CJKsfdefault}{SCsans}% + \renewcommand{\CJKttdefault}{SCmono}% + \xeCJKsetup{CJKspace = false}% + % For CJK ascii-art alignment + \setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndSC}{\endgroup} + \newcommand{\kerneldocBeginTC}{% + \begingroup% + \tcmain% + \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% + \xeCJKDeclareCharClass{FullRight}{`”,`’}% + \renewcommand{\CJKrmdefault}{TCserif}% + \renewcommand{\CJKsfdefault}{TCsans}% + \renewcommand{\CJKttdefault}{TCmono}% + \xeCJKsetup{CJKspace = false}% + % For CJK ascii-art alignment + \setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndTC}{\endgroup} + \newcommand{\kerneldocBeginKR}{% + \begingroup% + \krmain% + \renewcommand{\CJKrmdefault}{KRserif}% + \renewcommand{\CJKsfdefault}{KRsans}% + \renewcommand{\CJKttdefault}{KRmono}% + % \xeCJKsetup{CJKspace = true} % true by default + % For CJK ascii-art alignment (still misaligned for Hangul) + \setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndKR}{\endgroup} + \newcommand{\kerneldocBeginJP}{% + \begingroup% + \jpmain% + \renewcommand{\CJKrmdefault}{JPserif}% + \renewcommand{\CJKsfdefault}{JPsans}% + \renewcommand{\CJKttdefault}{JPmono}% + \xeCJKsetup{CJKspace = false}% + % For CJK ascii-art alignment + \setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndJP}{\endgroup} + % Single spacing in literal blocks + \fvset{baselinestretch=1} + % To customize \sphinxtableofcontents + \usepackage{etoolbox} + % Inactivate CJK after tableofcontents + \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{} + \xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC +}{ % No CJK font found + % Custom macros to on/off CJK (Dummy) + \newcommand{\kerneldocCJKon}{} + \newcommand{\kerneldocCJKoff}{} + \newcommand{\kerneldocBeginSC}[1]{% + \begin{sphinxadmonition}{note}{Note:} + ``Noto Sans CJK'' fonts are not found while building this PDF\@. + Translations of zh\_CN, zh\_TW, ko\_KR, and ja\_JP are skipped. + \end{sphinxadmonition}} + \newcommand{\kerneldocEndSC}{} + \newcommand{\kerneldocBeginTC}[1]{} + \newcommand{\kerneldocEndTC}{} + \newcommand{\kerneldocBeginKR}[1]{} + \newcommand{\kerneldocEndKR}{} + \newcommand{\kerneldocBeginJP}[1]{} + \newcommand{\kerneldocEndJP}{} +} -- GitLab From 7cee33ce237cef25cbd782783a7e0241ef6d0776 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 18 Feb 2022 23:13:41 +0900 Subject: [PATCH 0638/1586] docs: kerneldoc-preamble.sty: Expand comments in LaTeX code Expand comments in LaTeX code and mention some of important points told in changelogs of conf.py changes. Hopefully they can help future contributors in this area. No code change involved. Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/bce9261b-1950-3146-07b2-07bd2ec79158@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kerneldoc-preamble.sty | 61 ++++++++++++++++----- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty index 4e56ccea1dbd7..5e59900e994d9 100644 --- a/Documentation/sphinx/kerneldoc-preamble.sty +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -15,8 +15,20 @@ % % Copyright (C) 2022 Akira Yokosawa -% Custom width parameters for TOC --- Redefine low-level commands -% defined in report.cls +% Custom width parameters for TOC +% - Redefine low-level commands defined in report.cls. +% - Indent of 2 chars is preserved for ease of comparison. +% Summary of changes from default params: +% Width of page number (\@pnumwidth): 1.55em -> 2.7em +% Width of chapter number: 1.5em -> 1.8em +% Indent of section number: 1.5em -> 1.8em +% Width of section number: 2.6em -> 3.2em +% Indent of sebsection number: 4.1em -> 5em +% Width of subsection number: 3.5em -> 4.3em +% +% These params can have 4 digit page counts, 2 digit chapter counts, +% section counts of 4 digits + 1 period (e.g., 18.10), and subsection counts +% of 5 digits + 2 periods (e.g., 18.7.13). \makeatletter %% Redefine \@pnumwidth (page number width) \renewcommand*\@pnumwidth{2.7em} @@ -46,7 +58,10 @@ \providecommand{\sphinxtableofcontentshook}{} %% Undefine it for compatibility with Sphinx 1.7.9 \renewcommand{\sphinxtableofcontentshook}{} % Empty the hook -% Prevent column squeezing of tabulary. + +% Prevent column squeezing of tabulary. \tymin is set by Sphinx as: +% \setlength{\tymin}{3\fontcharwd\font`0 } +% , which is too short. \setlength{\tymin}{20em} % Adjust \headheight for fancyhdr @@ -56,8 +71,12 @@ % Translations have Asian (CJK) characters which are only displayed if % xeCJK is used \IfFontExistsTF{Noto Sans CJK SC}{ - % This is needed for translations + % Load xeCJK when CJK font is available \usepackage{xeCJK} + % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits + % its emulation. + % Select KR variant at the beginning of each document so that quotation + % and apostorph symbols of half-width is used in TOC of Latin documents. \IfFontExistsTF{Noto Serif CJK KR}{ \setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant] }{ @@ -65,9 +84,11 @@ } \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant] \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant] + % Teach xeCJK of half-width symbols \xeCJKDeclareCharClass{HalfLeft}{`“,`‘} \xeCJKDeclareCharClass{HalfRight}{`”,`’} % CJK Language-specific font choices + %% for Simplified Chinese \IfFontExistsTF{Noto Serif CJK SC}{ \newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant] \newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant] @@ -77,6 +98,7 @@ } \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant] \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant] + %% for Traditional Chinese \IfFontExistsTF{Noto Serif CJK TC}{ \newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant] \newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant] @@ -86,6 +108,7 @@ } \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant] \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant] + %% for Korean \IfFontExistsTF{Noto Serif CJK KR}{ \newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant] \newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant] @@ -95,6 +118,7 @@ } \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant] \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant] + %% for Japanese \IfFontExistsTF{Noto Serif CJK JP}{ \newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant] \newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant] @@ -108,34 +132,39 @@ \providecommand{\onehalfspacing}{} \providecommand{\singlespacing}{} % Define custom macros to on/off CJK + %% One and half spacing for CJK contents \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing} \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing} + % Define custom macros for switching CJK font setting + %% for Simplified Chinese \newcommand{\kerneldocBeginSC}{% \begingroup% \scmain% - \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% - \xeCJKDeclareCharClass{FullRight}{`”,`’}% + \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in SC + \xeCJKDeclareCharClass{FullRight}{`”,`’}% Full-width in SC \renewcommand{\CJKrmdefault}{SCserif}% \renewcommand{\CJKsfdefault}{SCsans}% \renewcommand{\CJKttdefault}{SCmono}% - \xeCJKsetup{CJKspace = false}% + \xeCJKsetup{CJKspace = false}% gobble white spaces by ' ' % For CJK ascii-art alignment \setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]% } \newcommand{\kerneldocEndSC}{\endgroup} + %% for Traditional Chinese \newcommand{\kerneldocBeginTC}{% \begingroup% \tcmain% - \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% - \xeCJKDeclareCharClass{FullRight}{`”,`’}% + \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in TC + \xeCJKDeclareCharClass{FullRight}{`”,`’}% Full-width in TC \renewcommand{\CJKrmdefault}{TCserif}% \renewcommand{\CJKsfdefault}{TCsans}% \renewcommand{\CJKttdefault}{TCmono}% - \xeCJKsetup{CJKspace = false}% + \xeCJKsetup{CJKspace = false}% gobble white spaces by ' ' % For CJK ascii-art alignment \setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]% } \newcommand{\kerneldocEndTC}{\endgroup} + %% for Korean \newcommand{\kerneldocBeginKR}{% \begingroup% \krmain% @@ -147,29 +176,35 @@ \setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]% } \newcommand{\kerneldocEndKR}{\endgroup} + %% for Japanese \newcommand{\kerneldocBeginJP}{% \begingroup% \jpmain% \renewcommand{\CJKrmdefault}{JPserif}% \renewcommand{\CJKsfdefault}{JPsans}% \renewcommand{\CJKttdefault}{JPmono}% - \xeCJKsetup{CJKspace = false}% + \xeCJKsetup{CJKspace = false}% gobble white space by ' ' % For CJK ascii-art alignment \setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]% } \newcommand{\kerneldocEndJP}{\endgroup} + % Single spacing in literal blocks \fvset{baselinestretch=1} % To customize \sphinxtableofcontents \usepackage{etoolbox} % Inactivate CJK after tableofcontents \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{} - \xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC + \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC }{ % No CJK font found - % Custom macros to on/off CJK (Dummy) + % Custom macros to on/off CJK and switch CJK fonts (Dummy) \newcommand{\kerneldocCJKon}{} \newcommand{\kerneldocCJKoff}{} + %% By defining \kerneldocBegin(SC|TC|KR|JP) as commands with an argument + %% and ignore the argument (#1) in their definitions, whole contents of + %% CJK chapters can be ignored. \newcommand{\kerneldocBeginSC}[1]{% + %% Put a note on missing CJK fonts in place of zh_CN translation. \begin{sphinxadmonition}{note}{Note:} ``Noto Sans CJK'' fonts are not found while building this PDF\@. Translations of zh\_CN, zh\_TW, ko\_KR, and ja\_JP are skipped. -- GitLab From 96c7f3b53cc34d8d11ce0b8c6baabba1c5ee16e0 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 18 Feb 2022 23:26:34 +0900 Subject: [PATCH 0639/1586] Reword note on missing CJK fonts Use past tense as the fonts can be installed after the fact. Add suggestion to install "Noto Sans CJK" and "Noto Serif CJK" font families. ("Noto Serif CJK" is optional.) Signed-off-by: Akira Yokosawa Cc: Jonathan Corbet Link: https://lore.kernel.org/r/dfefa601-c58d-c86c-953f-5e4454db9409@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kerneldoc-preamble.sty | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty index 5e59900e994d9..9d0204dc38be2 100644 --- a/Documentation/sphinx/kerneldoc-preamble.sty +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -205,9 +205,16 @@ %% CJK chapters can be ignored. \newcommand{\kerneldocBeginSC}[1]{% %% Put a note on missing CJK fonts in place of zh_CN translation. - \begin{sphinxadmonition}{note}{Note:} - ``Noto Sans CJK'' fonts are not found while building this PDF\@. - Translations of zh\_CN, zh\_TW, ko\_KR, and ja\_JP are skipped. + \begin{sphinxadmonition}{note}{Note on missing fonts:} + Translations of Simplified Chinese (zh\_CN), Traditional Chinese + (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped + due to the lack of suitable font families. + + If you want them, please install ``Noto Sans CJK'' font families + by following instructions from + \sphinxcode{./scripts/sphinx-pre-install}. + Having optional ``Noto Serif CJK'' font families will improve + the looks of those translations. \end{sphinxadmonition}} \newcommand{\kerneldocEndSC}{} \newcommand{\kerneldocBeginTC}[1]{} -- GitLab From 18ab307823bb643fc985d316448f2d70eb1cb7c3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 24 Feb 2022 12:35:07 -0700 Subject: [PATCH 0640/1586] docs: fix RST error in vm/page_owner.rst Commit f7df2b1cf03a ("tools/vm/page_owner_sort.c: count and sort by mem") added a literal text block without the necessary markup, leading to these warnings in the docs build: Documentation/vm/page_owner.rst:92: WARNING: Unexpected indentation. Documentation/vm/page_owner.rst:96: WARNING: Unexpected indentation. Documentation/vm/page_owner.rst:107: WARNING: Unexpected indentation. Add the necessary colons and make the build quieter. Signed-off-by: Jonathan Corbet --- Documentation/vm/page_owner.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index bc28edaf3de1f..905555e3e4836 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -85,7 +85,7 @@ Usage cat /sys/kernel/debug/page_owner > page_owner_full.txt ./page_owner_sort page_owner_full.txt sorted_page_owner.txt - The general output of ``page_owner_full.txt`` is as follows: + The general output of ``page_owner_full.txt`` is as follows:: Page allocated via order XXX, ... PFN XXX ... @@ -100,7 +100,7 @@ Usage and pages of buf, and finally sorts them according to the times. See the result about who allocated each page - in the ``sorted_page_owner.txt``. General output: + in the ``sorted_page_owner.txt``. General output:: XXX times, XXX pages: Page allocated via order XXX, ... -- GitLab From 1ecf393fc5a5962ebbe8d011dede6cab880f349b Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Wed, 16 Feb 2022 07:51:33 +0100 Subject: [PATCH 0641/1586] docs: add two documents about regression handling Create two documents explaining various aspects around regression handling and tracking; one is aimed at users, the other targets developers. The texts among others describes the first rule of Linux kernel development and what it means in practice. They also explain what a regression actually is and how to report one properly. Both texts additionally provide a brief introduction to the bot the kernel's regression tracker uses to facilitate the work, but mention the use is optional. To sum things up, provide a few quotes from Linus in the document for developers to show how serious we take regressions. Signed-off-by: Thorsten Leemhuis Link: https://lore.kernel.org/r/34e56d3588f22d7e0b4d635ef9c9c3b33ca4ac04.1644994117.git.linux@leemhuis.info Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/index.rst | 1 + .../admin-guide/reporting-regressions.rst | 439 ++++++++++++ .../process/handling-regressions.rst | 659 ++++++++++++++++++ Documentation/process/index.rst | 1 + MAINTAINERS | 2 + 5 files changed, 1102 insertions(+) create mode 100644 Documentation/admin-guide/reporting-regressions.rst create mode 100644 Documentation/process/handling-regressions.rst diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 1bedab498104a..5bfafcbb9562c 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -35,6 +35,7 @@ problems and bugs in particular. :maxdepth: 1 reporting-issues + reporting-regressions security-bugs bug-hunting bug-bisect diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst new file mode 100644 index 0000000000000..6fbd24ceb3bfe --- /dev/null +++ b/Documentation/admin-guide/reporting-regressions.rst @@ -0,0 +1,439 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) +.. [see the bottom of this file for redistribution information] + +Reporting regressions ++++++++++++++++++++++ + +"*We don't cause regressions*" is the first rule of Linux kernel development; +Linux founder and lead developer Linus Torvalds established it himself and +ensures it's obeyed. + +This document describes what the rule means for users and how the Linux kernel's +development model ensures to address all reported regressions; aspects relevant +for kernel developers are left to Documentation/process/handling-regressions.rst. + + +The important bits (aka "TL;DR") +================================ + +#. It's a regression if something running fine with one Linux kernel works worse + or not at all with a newer version. Note, the newer kernel has to be compiled + using a similar configuration; the detailed explanations below describes this + and other fine print in more detail. + +#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst, + it already covers all aspects important for regressions and repeated + below for convenience. Two of them are important: start your report's subject + with "[REGRESSION]" and CC or forward it to `the regression mailing list + `_ (regressions@lists.linux.dev). + +#. Optional, but recommended: when sending or forwarding your report, make the + Linux kernel regression tracking bot "regzbot" track the issue by specifying + when the regression started like this:: + + #regzbot introduced v5.13..v5.14-rc1 + + +All the details on Linux kernel regressions relevant for users +============================================================== + + +The important basics +-------------------- + + +What is a "regression" and what is the "no regressions rule"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's a regression if some application or practical use case running fine with +one Linux kernel works worse or not at all with a newer version compiled using a +similar configuration. The "no regressions rule" forbids this to take place; if +it happens by accident, developers that caused it are expected to quickly fix +the issue. + +It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with +5.14 doesn't work at all, works significantly slower, or misbehaves somehow. +It's also a regression if a perfectly working application suddenly shows erratic +behavior with a newer kernel version; such issues can be caused by changes in +procfs, sysfs, or one of the many other interfaces Linux provides to userland +software. But keep in mind, as mentioned earlier: 5.14 in this example needs to +be built from a configuration similar to the one from 5.13. This can be achieved +using ``make olddefconfig``, as explained in more detail below. + +Note the "practical use case" in the first sentence of this section: developers +despite the "no regressions" rule are free to change any aspect of the kernel +and even APIs or ABIs to userland, as long as no existing application or use +case breaks. + +Also be aware the "no regressions" rule covers only interfaces the kernel +provides to the userland. It thus does not apply to kernel-internal interfaces +like the module API, which some externally developed drivers use to hook into +the kernel. + +How do I report a regression? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Just report the issue as outlined in +Documentation/admin-guide/reporting-issues.rst, it already describes the +important points. The following aspects outlined there are especially relevant +for regressions: + + * When checking for existing reports to join, also search the `archives of the + Linux regressions mailing list `_ and + `regzbot's web-interface `_. + + * Start your report's subject with "[REGRESSION]". + + * In your report, clearly mention the last kernel version that worked fine and + the first broken one. Ideally try to find the exact change causing the + regression using a bisection, as explained below in more detail. + + * Remember to let the Linux regressions mailing list + (regressions@lists.linux.dev) know about your report: + + * If you report the regression by mail, CC the regressions list. + + * If you report your regression to some bug tracker, forward the submitted + report by mail to the regressions list while CCing the maintainer and the + mailing list for the subsystem in question. + + If it's a regression within a stable or longterm series (e.g. + v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list + `_ (stable@vger.kernel.org). + + In case you performed a successful bisection, add everyone to the CC the + culprit's commit message mentions in lines starting with "Signed-off-by:". + +When CCing for forwarding your report to the list, consider directly telling the +aforementioned Linux kernel regression tracking bot about your report. To do +that, include a paragraph like this in your mail:: + + #regzbot introduced: v5.13..v5.14-rc1 + +Regzbot will then consider your mail a report for a regression introduced in the +specified version range. In above case Linux v5.13 still worked fine and Linux +v5.14-rc1 was the first version where you encountered the issue. If you +performed a bisection to find the commit that caused the regression, specify the +culprit's commit-id instead:: + + #regzbot introduced: 1f2e3d4c5d + +Placing such a "regzbot command" is in your interest, as it will ensure the +report won't fall through the cracks unnoticed. If you omit this, the Linux +kernel's regressions tracker will take care of telling regzbot about your +regression, as long as you send a copy to the regressions mailing lists. But the +regression tracker is just one human which sometimes has to rest or occasionally +might even enjoy some time away from computers (as crazy as that might sound). +Relying on this person thus will result in an unnecessary delay before the +regressions becomes mentioned `on the list of tracked and unresolved Linux +kernel regressions `_ and the +weekly regression reports sent by regzbot. Such delays can result in Linus +Torvalds being unaware of important regressions when deciding between "continue +development or call this finished and release the final?". + +Are really all regressions fixed? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Nearly all of them are, as long as the change causing the regression (the +"culprit commit") is reliably identified. Some regressions can be fixed without +this, but often it's required. + +Who needs to find the root cause of a regression? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Developers of the affected code area should try to locate the culprit on their +own. But for them that's often impossible to do with reasonable effort, as quite +a lot of issues only occur in a particular environment outside the developer's +reach -- for example, a specific hardware platform, firmware, Linux distro, +system's configuration, or application. That's why in the end it's often up to +the reporter to locate the culprit commit; sometimes users might even need to +run additional tests afterwards to pinpoint the exact root cause. Developers +should offer advice and reasonably help where they can, to make this process +relatively easy and achievable for typical users. + +How can I find the culprit? +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Perform a bisection, as roughly outlined in +Documentation/admin-guide/reporting-issues.rst and described in more detail by +Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but +in many cases finds the culprit relatively quickly. If it's hard or +time-consuming to reliably reproduce the issue, consider teaming up with other +affected users to narrow down the search range together. + +Who can I ask for advice when it comes to regressions? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send a mail to the regressions mailing list (regressions@lists.linux.dev) while +CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the +issue might better be dealt with in private, feel free to omit the list. + + +Additional details about regressions +------------------------------------ + + +What is the goal of the "no regressions rule"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Users should feel safe when updating kernel versions and not have to worry +something might break. This is in the interest of the kernel developers to make +updating attractive: they don't want users to stay on stable or longterm Linux +series that are either abandoned or more than one and a half years old. That's +in everybody's interest, as `those series might have known bugs, security +issues, or other problematic aspects already fixed in later versions +`_. +Additionally, the kernel developers want to make it simple and appealing for +users to test the latest pre-release or regular release. That's also in +everybody's interest, as it's a lot easier to track down and fix problems, if +they are reported shortly after being introduced. + +Is the "no regressions" rule really adhered in practice? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's taken really seriously, as can be seen by many mailing list posts from +Linux creator and lead developer Linus Torvalds, some of which are quoted in +Documentation/process/handling-regressions.rst. + +Exceptions to this rule are extremely rare; in the past developers almost always +turned out to be wrong when they assumed a particular situation was warranting +an exception. + +Who ensures the "no regressions" is actually followed? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The subsystem maintainers should take care of that, which are watched and +supported by the tree maintainers -- e.g. Linus Torvalds for mainline and +Greg Kroah-Hartman et al. for various stable/longterm series. + +All of them are helped by people trying to ensure no regression report falls +through the cracks. One of them is Thorsten Leemhuis, who's currently acting as +the Linux kernel's "regressions tracker"; to facilitate this work he relies on +regzbot, the Linux kernel regression tracking bot. That's why you want to bring +your report on the radar of these people by CCing or forwarding each report to +the regressions mailing list, ideally with a "regzbot command" in your mail to +get it tracked immediately. + +Is it a regression, if the issue can be avoided by updating some software? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Almost always: yes. If a developer tells you otherwise, ask the regression +tracker for advice as outlined above. + +Is it a regression, if a newer kernel works slower or consumes more energy? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Yes, but the difference has to be significant. A five percent slow-down in a +micro-benchmark thus is unlikely to qualify as regression, unless it also +influences the results of a broad benchmark by more than one percent. If in +doubt, ask for advice. + +Is it a regression, if an external kernel module breaks when updating Linux? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +No, as the "no regression" rule is about interfaces and services the Linux +kernel provides to the userland. It thus does not cover building or running +externally developed kernel modules, as they run in kernel-space and hook into +the kernel using internal interfaces occasionally changed. + +How are regressions handled that are caused by security fixes? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In extremely rare situations security issues can't be fixed without causing +regressions; those fixes are given way, as they are the lesser evil in the end. +Luckily this middling almost always can be avoided, as key developers for the +affected area and often Linus Torvalds himself try very hard to fix security +issues without causing regressions. + +If you nevertheless face such a case, check the mailing list archives if people +tried their best to avoid the regression. If not, report it; if in doubt, ask +for advice as outlined above. + +What happens if fixing a regression is impossible without causing another? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sadly these things happen, but luckily not very often; if they occur, expert +developers of the affected code area should look into the issue to find a fix +that avoids regressions or at least their impact. If you run into such a +situation, do what was outlined already for regressions caused by security +fixes: check earlier discussions if people already tried their best and ask for +advice if in doubt. + +A quick note while at it: these situations could be avoided, if people would +regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each +development cycle a test run. This is best explained by imagining a change +integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at +the same time is a hard requirement for some other improvement applied for +5.15-rc1. All these changes often can simply be reverted and the regression thus +solved, if someone finds and reports it before 5.15 is released. A few days or +weeks later this solution can become impossible, as some software might have +started to rely on aspects introduced by one of the follow-up changes: reverting +all changes would then cause a regression for users of said software and thus is +out of the question. + +Is it a regression, if some feature I relied on was removed months ago? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is, but often it's hard to fix such regressions due to the aspects outlined +in the previous section. It hence needs to be dealt with on a case-by-case +basis. This is another reason why it's in everybody's interest to regularly test +mainline pre-releases. + +Does the "no regression" rule apply if I seem to be the only affected person? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It does, but only for practical usage: the Linux developers want to be free to +remove support for hardware only to be found in attics and museums anymore. + +Note, sometimes regressions can't be avoided to make progress -- and the latter +is needed to prevent Linux from stagnation. Hence, if only very few users seem +to be affected by a regression, it for the greater good might be in their and +everyone else's interest to lettings things pass. Especially if there is an +easy way to circumvent the regression somehow, for example by updating some +software or using a kernel parameter created just for this purpose. + +Does the regression rule apply for code in the staging tree as well? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Not according to the `help text for the configuration option covering all +staging code `_, +which since its early days states:: + + Please note that these drivers are under heavy development, may or + may not work, and may contain userspace interfaces that most likely + will be changed in the near future. + +The staging developers nevertheless often adhere to the "no regressions" rule, +but sometimes bend it to make progress. That's for example why some users had to +deal with (often negligible) regressions when a WiFi driver from the staging +tree was replaced by a totally different one written from scratch. + +Why do later versions have to be "compiled with a similar configuration"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Because the Linux kernel developers sometimes integrate changes known to cause +regressions, but make them optional and disable them in the kernel's default +configuration. This trick allows progress, as the "no regressions" rule +otherwise would lead to stagnation. + +Consider for example a new security feature blocking access to some kernel +interfaces often abused by malware, which at the same time are required to run a +few rarely used applications. The outlined approach makes both camps happy: +people using these applications can leave the new security feature off, while +everyone else can enable it without running into trouble. + +How to create a configuration similar to the one of an older kernel? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Start your machine with a known-good kernel and configure the newer Linux +version with ``make olddefconfig``. This makes the kernel's build scripts pick +up the configuration file (the ".config" file) from the running kernel as base +for the new one you are about to compile; afterwards they set all new +configuration options to their default value, which should disable new features +that might cause regressions. + +Can I report a regression I found with pre-compiled vanilla kernels? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You need to ensure the newer kernel was compiled with a similar configuration +file as the older one (see above), as those that built them might have enabled +some known-to-be incompatible feature for the newer kernel. If in doubt, report +the matter to the kernel's provider and ask for advice. + + +More about regression tracking with "regzbot" +--------------------------------------------- + +What is regression tracking and why should I care about it? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Rules like "no regressions" need someone to ensure they are followed, otherwise +they are broken either accidentally or on purpose. History has shown this to be +true for Linux kernel development as well. That's why Thorsten Leemhuis, the +Linux Kernel's regression tracker, and some people try to ensure all regression +are fixed by keeping an eye on them until they are resolved. Neither of them are +paid for this, that's why the work is done on a best effort basis. + +Why and how are Linux kernel regressions tracked using a bot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Tracking regressions completely manually has proven to be quite hard due to the +distributed and loosely structured nature of Linux kernel development process. +That's why the Linux kernel's regression tracker developed regzbot to facilitate +the work, with the long term goal to automate regression tracking as much as +possible for everyone involved. + +Regzbot works by watching for replies to reports of tracked regressions. +Additionally, it's looking out for posted or committed patches referencing such +reports with "Link:" tags; replies to such patch postings are tracked as well. +Combined this data provides good insights into the current state of the fixing +process. + +How to see which regressions regzbot tracks currently? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check out `regzbot's web-interface `_. + +What kind of issues are supposed to be tracked by regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bot is meant to track regressions, hence please don't involve regzbot for +regular issues. But it's okay for the Linux kernel's regression tracker if you +involve regzbot to track severe issues, like reports about hangs, corrupted +data, or internal errors (Panic, Oops, BUG(), warning, ...). + +How to change aspects of a tracked regression? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using a 'regzbot command' in a direct or indirect reply to the mail with the +report. The easiest way to do that: find the report in your "Sent" folder or the +mailing list archive and reply to it using your mailer's "Reply-all" function. +In that mail, use one of the following commands in a stand-alone paragraph (IOW: +use blank lines to separate one or multiple of these commands from the rest of +the mail's text). + + * Update when the regression started to happen, for example after performing a + bisection:: + + #regzbot introduced: 1f2e3d4c5d + + * Set or update the title:: + + #regzbot title: foo + + * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of + the issue or a fix are discussed::: + + #regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/ + #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * Point to a place with further details of interest, like a mailing list post + or a ticket in a bug tracker that are slightly related, but about a different + topic:: + + #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * Mark a regression as invalid:: + + #regzbot invalid: wasn't a regression, problem has always existed + +Regzbot supports a few other commands primarily used by developers or people +tracking regressions. They and more details about the aforementioned regzbot +commands can be found in the `getting started guide +`_ and +the `reference documentation `_ +for regzbot. + +.. + end-of-content +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst new file mode 100644 index 0000000000000..e1102a3207e3f --- /dev/null +++ b/Documentation/process/handling-regressions.rst @@ -0,0 +1,659 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) +.. See the bottom of this file for additional redistribution information. + +Handling regressions +++++++++++++++++++++ + +*We don't cause regressions* -- this document describes what this "first rule of +Linux kernel development" means in practice for developers. It complements +Documentation/admin-guide/reporting-regressions.rst, which covers the topic from a +user's point of view; if you never read that text, go and at least skim over it +before continuing here. + +The important bits (aka "The TL;DR") +==================================== + +#. Ensure subscribers of the `regression mailing list `_ + (regressions@lists.linux.dev) quickly become aware of any new regression + report: + + * When receiving a mailed report that did not CC the list, bring it into the + loop by immediately sending at least a brief "Reply-all" with the list + CCed. + + * Forward or bounce any reports submitted in bug trackers to the list. + +#. Make the Linux kernel regression tracking bot "regzbot" track the issue (this + is optional, but recommended): + + * For mailed reports, check if the reporter included a line like ``#regzbot + introduced v5.13..v5.14-rc1``. If not, send a reply (with the regressions + list in CC) containing a paragraph like the following, which tells regzbot + when the issue started to happen:: + + #regzbot ^introduced 1f2e3d4c5b6a + + * When forwarding reports from a bug tracker to the regressions list (see + above), include a paragraph like the following:: + + #regzbot introduced: v5.13..v5.14-rc1 + #regzbot from: Some N. Ice Human + #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789 + +#. When submitting fixes for regressions, add "Link:" tags to the patch + description pointing to all places where the issue was reported, as + mandated by Documentation/process/submitting-patches.rst and + :ref:`Documentation/process/5.Posting.rst `. + + +All the details on Linux kernel regressions relevant for developers +=================================================================== + + +The important basics in more detail +----------------------------------- + + +What to do when receiving regression reports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ensure the Linux kernel's regression tracker and others subscribers of the +`regression mailing list `_ +(regressions@lists.linux.dev) become aware of any newly reported regression: + + * When you receive a report by mail that did not CC the list, immediately bring + it into the loop by sending at least a brief "Reply-all" with the list CCed; + try to ensure it gets CCed again in case you reply to a reply that omitted + the list. + + * If a report submitted in a bug tracker hits your Inbox, forward or bounce it + to the list. Consider checking the list archives beforehand, if the reporter + already forwarded the report as instructed by + Documentation/admin-guide/reporting-issues.rst. + +When doing either, consider making the Linux kernel regression tracking bot +"regzbot" immediately start tracking the issue: + + * For mailed reports, check if the reporter included a "regzbot command" like + ``#regzbot introduced 1f2e3d4c5b6a``. If not, send a reply (with the + regressions list in CC) with a paragraph like the following::: + + #regzbot ^introduced: v5.13..v5.14-rc1 + + This tells regzbot the version range in which the issue started to happen; + you can specify a range using commit-ids as well or state a single commit-id + in case the reporter bisected the culprit. + + Note the caret (^) before the "introduced": it tells regzbot to treat the + parent mail (the one you reply to) as the initial report for the regression + you want to see tracked; that's important, as regzbot will later look out + for patches with "Link:" tags pointing to the report in the archives on + lore.kernel.org. + + * When forwarding a regressions reported to a bug tracker, include a paragraph + with these regzbot commands:: + + #regzbot introduced: 1f2e3d4c5b6a + #regzbot from: Some N. Ice Human + #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789 + + Regzbot will then automatically associate patches with the report that + contain "Link:" tags pointing to your mail or the mentioned ticket. + +What's important when fixing regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You don't need to do anything special when submitting fixes for regression, just +remember to do what Documentation/process/submitting-patches.rst, +:ref:`Documentation/process/5.Posting.rst `, and +Documentation/process/stable-kernel-rules.rst already explain in more detail: + + * Point to all places where the issue was reported using "Link:" tags:: + + Link: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/ + Link: https://bugzilla.kernel.org/show_bug.cgi?id=1234567890 + + * Add a "Fixes:" tag to specify the commit causing the regression. + + * If the culprit was merged in an earlier development cycle, explicitly mark + the fix for backporting using the ``Cc: stable@vger.kernel.org`` tag. + +All this is expected from you and important when it comes to regression, as +these tags are of great value for everyone (you included) that might be looking +into the issue weeks, months, or years later. These tags are also crucial for +tools and scripts used by other kernel developers or Linux distributions; one of +these tools is regzbot, which heavily relies on the "Link:" tags to associate +reports for regression with changes resolving them. + + +More aspects regarding regressions developers should be aware of +---------------------------------------------------------------- + + +How to deal with changes where a risk of regression is known +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Evaluate how big the risk of regressions is, for example by performing a code +search in Linux distributions and Git forges. Also consider asking other +developers or projects likely to be affected to evaluate or even test the +proposed change; if problems surface, maybe some solution acceptable for all +can be found. + +If the risk of regressions in the end seems to be relatively small, go ahead +with the change, but let all involved parties know about the risk. Hence, make +sure your patch description makes this aspect obvious. Once the change is +merged, tell the Linux kernel's regression tracker and the regressions mailing +list about the risk, so everyone has the change on the radar in case reports +trickle in. Depending on the risk, you also might want to ask the subsystem +maintainer to mention the issue in his mainline pull request. + +What else is there to known about regressions? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check out Documentation/admin-guide/reporting-regressions.rst, it covers a lot +of other aspects you want might want to be aware of: + + * the purpose of the "no regressions rule" + + * what issues actually qualify as regression + + * who's in charge for finding the root cause of a regression + + * how to handle tricky situations, e.g. when a regression is caused by a + security fix or when fixing a regression might cause another one + +Whom to ask for advice when it comes to regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send a mail to the regressions mailing list (regressions@lists.linux.dev) while +CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the +issue might better be dealt with in private, feel free to omit the list. + + +More about regression tracking and regzbot +------------------------------------------ + + +Why the Linux kernel has a regression tracker, and why is regzbot used? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Rules like "no regressions" need someone to ensure they are followed, otherwise +they are broken either accidentally or on purpose. History has shown this to be +true for the Linux kernel as well. That's why Thorsten Leemhuis volunteered to +keep an eye on things as the Linux kernel's regression tracker, who's +occasionally helped by other people. Neither of them are paid to do this, +that's why regression tracking is done on a best effort basis. + +Earlier attempts to manually track regressions have shown it's an exhausting and +frustrating work, which is why they were abandoned after a while. To prevent +this from happening again, Thorsten developed regzbot to facilitate the work, +with the long term goal to automate regression tracking as much as possible for +everyone involved. + +How does regression tracking work with regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bot watches for replies to reports of tracked regressions. Additionally, +it's looking out for posted or committed patches referencing such reports +with "Link:" tags; replies to such patch postings are tracked as well. +Combined this data provides good insights into the current state of the fixing +process. + +Regzbot tries to do its job with as little overhead as possible for both +reporters and developers. In fact, only reporters are burdened with an extra +duty: they need to tell regzbot about the regression report using the ``#regzbot +introduced`` command outlined above; if they don't do that, someone else can +take care of that using ``#regzbot ^introduced``. + +For developers there normally is no extra work involved, they just need to make +sure to do something that was expected long before regzbot came to light: add +"Link:" tags to the patch description pointing to all reports about the issue +fixed. + +Do I have to use regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's in the interest of everyone if you do, as kernel maintainers like Linus +Torvalds partly rely on regzbot's tracking in their work -- for example when +deciding to release a new version or extend the development phase. For this they +need to be aware of all unfixed regression; to do that, Linus is known to look +into the weekly reports sent by regzbot. + +Do I have to tell regzbot about every regression I stumble upon? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ideally yes: we are all humans and easily forget problems when something more +important unexpectedly comes up -- for example a bigger problem in the Linux +kernel or something in real life that's keeping us away from keyboards for a +while. Hence, it's best to tell regzbot about every regression, except when you +immediately write a fix and commit it to a tree regularly merged to the affected +kernel series. + +How to see which regressions regzbot tracks currently? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check `regzbot's web-interface `_ +for the latest info; alternatively, `search for the latest regression report +`_, +which regzbot normally sends out once a week on Sunday evening (UTC), which is a +few hours before Linus usually publishes new (pre-)releases. + +What places is regzbot monitoring? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Regzbot is watching the most important Linux mailing lists as well as the git +repositories of linux-next, mainline, and stable/longterm. + +What kind of issues are supposed to be tracked by regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bot is meant to track regressions, hence please don't involve regzbot for +regular issues. But it's okay for the Linux kernel's regression tracker if you +use regzbot to track severe issues, like reports about hangs, corrupted data, +or internal errors (Panic, Oops, BUG(), warning, ...). + +Can I add regressions found by CI systems to regzbot's tracking? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Feel free to do so, if the particular regression likely has impact on practical +use cases and thus might be noticed by users; hence, please don't involve +regzbot for theoretical regressions unlikely to show themselves in real world +usage. + +How to interact with regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using a 'regzbot command' in a direct or indirect reply to the mail with the +regression report. These commands need to be in their own paragraph (IOW: they +need to be separated from the rest of the mail using blank lines). + +One such command is ``#regzbot introduced ``, which makes +regzbot consider your mail as a regressions report added to the tracking, as +already described above; ``#regzbot ^introduced `` is another +such command, which makes regzbot consider the parent mail as a report for a +regression which it starts to track. + +Once one of those two commands has been utilized, other regzbot commands can be +used in direct or indirect replies to the report. You can write them below one +of the `introduced` commands or in replies to the mail that used one of them +or itself is a reply to that mail: + + * Set or update the title:: + + #regzbot title: foo + + * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of + the issue or a fix are discussed -- for example the posting of a patch fixing + the regression:: + + #regzbot monitor: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/ + + Monitoring only works for lore.kernel.org and bugzilla.kernel.org; regzbot + will consider all messages in that thread or ticket as related to the fixing + process. + + * Point to a place with further details of interest, like a mailing list post + or a ticket in a bug tracker that are slightly related, but about a different + topic:: + + #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * Mark a regression as fixed by a commit that is heading upstream or already + landed:: + + #regzbot fixed-by: 1f2e3d4c5d + + * Mark a regression as a duplicate of another one already tracked by regzbot:: + + #regzbot dup-of: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/ + + * Mark a regression as invalid:: + + #regzbot invalid: wasn't a regression, problem has always existed + +Is there more to tell about regzbot and its commands? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +More detailed and up-to-date information about the Linux +kernel's regression tracking bot can be found on its +`project page `_, which among others +contains a `getting started guide `_ +and `reference documentation `_ +which both cover more details than the above section. + +Quotes from Linus about regression +---------------------------------- + +Find below a few real life examples of how Linus Torvalds expects regressions to +be handled: + + * From `2017-10-26 (1/2) + `_:: + + If you break existing user space setups THAT IS A REGRESSION. + + It's not ok to say "but we'll fix the user space setup". + + Really. NOT OK. + + [...] + + The first rule is: + + - we don't cause regressions + + and the corollary is that when regressions *do* occur, we admit to + them and fix them, instead of blaming user space. + + The fact that you have apparently been denying the regression now for + three weeks means that I will revert, and I will stop pulling apparmor + requests until the people involved understand how kernel development + is done. + + * From `2017-10-26 (2/2) + `_:: + + People should basically always feel like they can update their kernel + and simply not have to worry about it. + + I refuse to introduce "you can only update the kernel if you also + update that other program" kind of limitations. If the kernel used to + work for you, the rule is that it continues to work for you. + + There have been exceptions, but they are few and far between, and they + generally have some major and fundamental reasons for having happened, + that were basically entirely unavoidable, and people _tried_hard_ to + avoid them. Maybe we can't practically support the hardware any more + after it is decades old and nobody uses it with modern kernels any + more. Maybe there's a serious security issue with how we did things, + and people actually depended on that fundamentally broken model. Maybe + there was some fundamental other breakage that just _had_ to have a + flag day for very core and fundamental reasons. + + And notice that this is very much about *breaking* peoples environments. + + Behavioral changes happen, and maybe we don't even support some + feature any more. There's a number of fields in /proc//stat that + are printed out as zeroes, simply because they don't even *exist* in + the kernel any more, or because showing them was a mistake (typically + an information leak). But the numbers got replaced by zeroes, so that + the code that used to parse the fields still works. The user might not + see everything they used to see, and so behavior is clearly different, + but things still _work_, even if they might no longer show sensitive + (or no longer relevant) information. + + But if something actually breaks, then the change must get fixed or + reverted. And it gets fixed in the *kernel*. Not by saying "well, fix + your user space then". It was a kernel change that exposed the + problem, it needs to be the kernel that corrects for it, because we + have a "upgrade in place" model. We don't have a "upgrade with new + user space". + + And I seriously will refuse to take code from people who do not + understand and honor this very simple rule. + + This rule is also not going to change. + + And yes, I realize that the kernel is "special" in this respect. I'm + proud of it. + + I have seen, and can point to, lots of projects that go "We need to + break that use case in order to make progress" or "you relied on + undocumented behavior, it sucks to be you" or "there's a better way to + do what you want to do, and you have to change to that new better + way", and I simply don't think that's acceptable outside of very early + alpha releases that have experimental users that know what they signed + up for. The kernel hasn't been in that situation for the last two + decades. + + We do API breakage _inside_ the kernel all the time. We will fix + internal problems by saying "you now need to do XYZ", but then it's + about internal kernel API's, and the people who do that then also + obviously have to fix up all the in-kernel users of that API. Nobody + can say "I now broke the API you used, and now _you_ need to fix it + up". Whoever broke something gets to fix it too. + + And we simply do not break user space. + + * From `2020-05-21 + `_:: + + The rules about regressions have never been about any kind of + documented behavior, or where the code lives. + + The rules about regressions are always about "breaks user workflow". + + Users are literally the _only_ thing that matters. + + No amount of "you shouldn't have used this" or "that behavior was + undefined, it's your own fault your app broke" or "that used to work + simply because of a kernel bug" is at all relevant. + + Now, reality is never entirely black-and-white. So we've had things + like "serious security issue" etc that just forces us to make changes + that may break user space. But even then the rule is that we don't + really have other options that would allow things to continue. + + And obviously, if users take years to even notice that something + broke, or if we have sane ways to work around the breakage that + doesn't make for too much trouble for users (ie "ok, there are a + handful of users, and they can use a kernel command line to work + around it" kind of things) we've also been a bit less strict. + + But no, "that was documented to be broken" (whether it's because the + code was in staging or because the man-page said something else) is + irrelevant. If staging code is so useful that people end up using it, + that means that it's basically regular kernel code with a flag saying + "please clean this up". + + The other side of the coin is that people who talk about "API + stability" are entirely wrong. API's don't matter either. You can make + any changes to an API you like - as long as nobody notices. + + Again, the regression rule is not about documentation, not about + API's, and not about the phase of the moon. + + It's entirely about "we caused problems for user space that used to work". + + * From `2017-11-05 + `_:: + + And our regression rule has never been "behavior doesn't change". + That would mean that we could never make any changes at all. + + For example, we do things like add new error handling etc all the + time, which we then sometimes even add tests for in our kselftest + directory. + + So clearly behavior changes all the time and we don't consider that a + regression per se. + + The rule for a regression for the kernel is that some real user + workflow breaks. Not some test. Not a "look, I used to be able to do + X, now I can't". + + * From `2018-08-03 + `_:: + + YOU ARE MISSING THE #1 KERNEL RULE. + + We do not regress, and we do not regress exactly because your are 100% wrong. + + And the reason you state for your opinion is in fact exactly *WHY* you + are wrong. + + Your "good reasons" are pure and utter garbage. + + The whole point of "we do not regress" is so that people can upgrade + the kernel and never have to worry about it. + + > Kernel had a bug which has been fixed + + That is *ENTIRELY* immaterial. + + Guys, whether something was buggy or not DOES NOT MATTER. + + Why? + + Bugs happen. That's a fact of life. Arguing that "we had to break + something because we were fixing a bug" is completely insane. We fix + tens of bugs every single day, thinking that "fixing a bug" means that + we can break something is simply NOT TRUE. + + So bugs simply aren't even relevant to the discussion. They happen, + they get found, they get fixed, and it has nothing to do with "we + break users". + + Because the only thing that matters IS THE USER. + + How hard is that to understand? + + Anybody who uses "but it was buggy" as an argument is entirely missing + the point. As far as the USER was concerned, it wasn't buggy - it + worked for him/her. + + Maybe it worked *because* the user had taken the bug into account, + maybe it worked because the user didn't notice - again, it doesn't + matter. It worked for the user. + + Breaking a user workflow for a "bug" is absolutely the WORST reason + for breakage you can imagine. + + It's basically saying "I took something that worked, and I broke it, + but now it's better". Do you not see how f*cking insane that statement + is? + + And without users, your program is not a program, it's a pointless + piece of code that you might as well throw away. + + Seriously. This is *why* the #1 rule for kernel development is "we + don't break users". Because "I fixed a bug" is absolutely NOT AN + ARGUMENT if that bug fix broke a user setup. You actually introduced a + MUCH BIGGER bug by "fixing" something that the user clearly didn't + even care about. + + And dammit, we upgrade the kernel ALL THE TIME without upgrading any + other programs at all. It is absolutely required, because flag-days + and dependencies are horribly bad. + + And it is also required simply because I as a kernel developer do not + upgrade random other tools that I don't even care about as I develop + the kernel, and I want any of my users to feel safe doing the same + time. + + So no. Your rule is COMPLETELY wrong. If you cannot upgrade a kernel + without upgrading some other random binary, then we have a problem. + + * From `2021-06-05 + `_:: + + THERE ARE NO VALID ARGUMENTS FOR REGRESSIONS. + + Honestly, security people need to understand that "not working" is not + a success case of security. It's a failure case. + + Yes, "not working" may be secure. But security in that case is *pointless*. + + * From `2011-05-06 (1/3) + `_:: + + Binary compatibility is more important. + + And if binaries don't use the interface to parse the format (or just + parse it wrongly - see the fairly recent example of adding uuid's to + /proc/self/mountinfo), then it's a regression. + + And regressions get reverted, unless there are security issues or + similar that makes us go "Oh Gods, we really have to break things". + + I don't understand why this simple logic is so hard for some kernel + developers to understand. Reality matters. Your personal wishes matter + NOT AT ALL. + + If you made an interface that can be used without parsing the + interface description, then we're stuck with the interface. Theory + simply doesn't matter. + + You could help fix the tools, and try to avoid the compatibility + issues that way. There aren't that many of them. + + From `2011-05-06 (2/3) + `_:: + + it's clearly NOT an internal tracepoint. By definition. It's being + used by powertop. + + From `2011-05-06 (3/3) + `_:: + + We have programs that use that ABI and thus it's a regression if they break. + + * From `2012-07-06 `_:: + + > Now this got me wondering if Debian _unstable_ actually qualifies as a + > standard distro userspace. + + Oh, if the kernel breaks some standard user space, that counts. Tons + of people run Debian unstable + + * From `2019-09-15 + `_:: + + One _particularly_ last-minute revert is the top-most commit (ignoring + the version change itself) done just before the release, and while + it's very annoying, it's perhaps also instructive. + + What's instructive about it is that I reverted a commit that wasn't + actually buggy. In fact, it was doing exactly what it set out to do, + and did it very well. In fact it did it _so_ well that the much + improved IO patterns it caused then ended up revealing a user-visible + regression due to a real bug in a completely unrelated area. + + The actual details of that regression are not the reason I point that + revert out as instructive, though. It's more that it's an instructive + example of what counts as a regression, and what the whole "no + regressions" kernel rule means. The reverted commit didn't change any + API's, and it didn't introduce any new bugs. But it ended up exposing + another problem, and as such caused a kernel upgrade to fail for a + user. So it got reverted. + + The point here being that we revert based on user-reported _behavior_, + not based on some "it changes the ABI" or "it caused a bug" concept. + The problem was really pre-existing, and it just didn't happen to + trigger before. The better IO patterns introduced by the change just + happened to expose an old bug, and people had grown to depend on the + previously benign behavior of that old issue. + + And never fear, we'll re-introduce the fix that improved on the IO + patterns once we've decided just how to handle the fact that we had a + bad interaction with an interface that people had then just happened + to rely on incidental behavior for before. It's just that we'll have + to hash through how to do that (there are no less than three different + patches by three different developers being discussed, and there might + be more coming...). In the meantime, I reverted the thing that exposed + the problem to users for this release, even if I hope it will be + re-introduced (perhaps even backported as a stable patch) once we have + consensus about the issue it exposed. + + Take-away from the whole thing: it's not about whether you change the + kernel-userspace ABI, or fix a bug, or about whether the old code + "should never have worked in the first place". It's about whether + something breaks existing users' workflow. + + Anyway, that was my little aside on the whole regression thing. Since + it's that "first rule of kernel programming", I felt it is perhaps + worth just bringing it up every once in a while + +.. + end-of-content +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/process/handling-regressions.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 9f1b88492bb33..428e39074f616 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -25,6 +25,7 @@ Below are the essential guides that every developer should read. code-of-conduct-interpretation development-process submitting-patches + handling-regressions programming-language coding-style maintainer-handbooks diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c9143848..6c62f7e0dc9d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10438,6 +10438,8 @@ KERNEL REGRESSIONS M: Thorsten Leemhuis L: regressions@lists.linux.dev S: Supported +F: Documentation/admin-guide/reporting-regressions.rst +F: Documentation/process/handling-regressions.rst KERNEL SELFTEST FRAMEWORK M: Shuah Khan -- GitLab From d2b40ba2cce207ecea8a740f71e113f03cc75fd5 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Wed, 16 Feb 2022 07:51:34 +0100 Subject: [PATCH 0642/1586] docs: *-regressions.rst: explain how quickly issues should be handled Add a section with a few rules of thumb about how quickly developers should address regressions to Documentation/process/handling-regressions.rst; additionally, add a short paragraph about this to the companion document Documentation/admin-guide/reporting-regressions.rst as well. The rules of thumb were written after studying the quotes from Linus found in handling-regressions.rst and especially influenced by statements like "Users are literally the _only_ thing that matters" and "without users, your program is not a program, it's a pointless piece of code that you might as well throw away". The author interpreted those in perspective to how the various Linux kernel series are maintained currently and what those practices might mean for users running into a regression on a small or big kernel update. That for example lead to the paragraph starting with "Aim to get fixes for regressions mainlined within one week after identifying the culprit, if the regression was introduced in a stable/longterm release or the devel cycle for the latest mainline release". Some might see this as pretty high bar, but on the other hand something like that is needed to not leave users out in the cold for too long -- which can quickly happen when updating to the latest stable series, as the previous one is normally stamped "End of Life" about three or four weeks after a new mainline release. This makes a lot of users switch during this timeframe. Any of them thus risk running into regressions not promptly fixed; even worse, once the previous stable series is EOLed for real, users that face a regression might be left with only three options: (1) continue running an outdated and thus potentially insecure kernel version from an abandoned stable series (2) run the kernel with the regression (3) downgrade to an earlier longterm series still supported This is better avoided, as (1) puts users and their data in danger, (2) will only be possible if it's a minor regression that doesn't interfere with booting or serious usage, and (3) might be regression itself or impossible on the particular machine, as the users might require drivers or features only introduced after the latest longterm series branched of. In the end this lead to the aforementioned "Aim to fix regression within one week" part. It's also the reason for the "Try to resolve any regressions introduced in the current development cycle before its end.". Signed-off-by: Thorsten Leemhuis CC: Linus Torvalds Acked-by: Greg Kroah-Hartman Reviewed-by: Lukas Bulwahn Link: https://lore.kernel.org/r/a7b717b52c0d54cdec9b6daf56ed6669feddee2c.1644994117.git.linux@leemhuis.info Signed-off-by: Jonathan Corbet --- .../admin-guide/reporting-regressions.rst | 12 +++ .../process/handling-regressions.rst | 87 +++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst index 6fbd24ceb3bfe..d8adccdae23f5 100644 --- a/Documentation/admin-guide/reporting-regressions.rst +++ b/Documentation/admin-guide/reporting-regressions.rst @@ -214,6 +214,18 @@ your report on the radar of these people by CCing or forwarding each report to the regressions mailing list, ideally with a "regzbot command" in your mail to get it tracked immediately. +How quickly are regressions normally fixed? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Developers should fix any reported regression as quickly as possible, to provide +affected users with a solution in a timely manner and prevent more users from +running into the issue; nevertheless developers need to take enough time and +care to ensure regression fixes do not cause additional damage. + +The answer thus depends on various factors like the impact of a regression, its +age, or the Linux series in which it occurs. In the end though, most regressions +should be fixed within two weeks. + Is it a regression, if the issue can be avoided by updating some software? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst index e1102a3207e3f..abb741b1aeee7 100644 --- a/Documentation/process/handling-regressions.rst +++ b/Documentation/process/handling-regressions.rst @@ -45,6 +45,10 @@ The important bits (aka "The TL;DR") mandated by Documentation/process/submitting-patches.rst and :ref:`Documentation/process/5.Posting.rst `. +#. Try to fix regressions quickly once the culprit has been identified; fixes + for most regressions should be merged within two weeks, but some need to be + resolved within two or three days. + All the details on Linux kernel regressions relevant for developers =================================================================== @@ -125,6 +129,89 @@ tools and scripts used by other kernel developers or Linux distributions; one of these tools is regzbot, which heavily relies on the "Link:" tags to associate reports for regression with changes resolving them. +Prioritize work on fixing regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You should fix any reported regression as quickly as possible, to provide +affected users with a solution in a timely manner and prevent more users from +running into the issue; nevertheless developers need to take enough time and +care to ensure regression fixes do not cause additional damage. + +In the end though, developers should give their best to prevent users from +running into situations where a regression leaves them only three options: "run +a kernel with a regression that seriously impacts usage", "continue running an +outdated and thus potentially insecure kernel version for more than two weeks +after a regression's culprit was identified", and "downgrade to a still +supported kernel series that lack required features". + +How to realize this depends a lot on the situation. Here are a few rules of +thumb for you, in order or importance: + + * Prioritize work on handling regression reports and fixing regression over all + other Linux kernel work, unless the latter concerns acute security issues or + bugs causing data loss or damage. + + * Always consider reverting the culprit commits and reapplying them later + together with necessary fixes, as this might be the least dangerous and + quickest way to fix a regression. + + * Developers should handle regressions in all supported kernel series, but are + free to delegate the work to the stable team, if the issue probably at no + point in time occurred with mainline. + + * Try to resolve any regressions introduced in the current development before + its end. If you fear a fix might be too risky to apply only days before a new + mainline release, let Linus decide: submit the fix separately to him as soon + as possible with the explanation of the situation. He then can make a call + and postpone the release if necessary, for example if multiple such changes + show up in his inbox. + + * Address regressions in stable, longterm, or proper mainline releases with + more urgency than regressions in mainline pre-releases. That changes after + the release of the fifth pre-release, aka "-rc5": mainline then becomes as + important, to ensure all the improvements and fixes are ideally tested + together for at least one week before Linus releases a new mainline version. + + * Fix regressions within two or three days, if they are critical for some + reason -- for example, if the issue is likely to affect many users of the + kernel series in question on all or certain architectures. Note, this + includes mainline, as issues like compile errors otherwise might prevent many + testers or continuous integration systems from testing the series. + + * Aim to fix regressions within one week after the culprit was identified, if + the issue was introduced in either: + + * a recent stable/longterm release + + * the development cycle of the latest proper mainline release + + In the latter case (say Linux v5.14), try to address regressions even + quicker, if the stable series for the predecessor (v5.13) will be abandoned + soon or already was stamped "End-of-Life" (EOL) -- this usually happens about + three to four weeks after a new mainline release. + + * Try to fix all other regressions within two weeks after the culprit was + found. Two or three additional weeks are acceptable for performance + regressions and other issues which are annoying, but don't prevent anyone + from running Linux (unless it's an issue in the current development cycle, + as those should ideally be addressed before the release). A few weeks in + total are acceptable if a regression can only be fixed with a risky change + and at the same time is affecting only a few users; as much time is + also okay if the regression is already present in the second newest longterm + kernel series. + +Note: The aforementioned time frames for resolving regressions are meant to +include getting the fix tested, reviewed, and merged into mainline, ideally with +the fix being in linux-next at least briefly. This leads to delays you need to +account for. + +Subsystem maintainers are expected to assist in reaching those periods by doing +timely reviews and quick handling of accepted patches. They thus might have to +send git-pull requests earlier or more often than usual; depending on the fix, +it might even be acceptable to skip testing in linux-next. Especially fixes for +regressions in stable and longterm kernels need to be handled quickly, as fixes +need to be merged in mainline before they can be backported to older series. + More aspects regarding regressions developers should be aware of ---------------------------------------------------------------- -- GitLab From 247097e2bbff4201b85eee8de4f31b4065877f67 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Wed, 16 Feb 2022 07:51:35 +0100 Subject: [PATCH 0643/1586] docs: reporting-issues.rst: link new document about regressions Make Documentation/admin-guide/reporting-issues.rst point to the newly created document about regressions (Documentation/admin-guide/regressions-regressions.rst). This allows to shorten a few explanations the new document describes better and in more detail. While at it move the copyright hint to the end of the file and remove quotes around links to other places in the documentation. Both issues came up during the review of the new documents about regressions. Signed-off-by: Thorsten Leemhuis Link: https://lore.kernel.org/r/f20114eaddc356a8c79dd62812a6c7f4ca5d87b9.1644994117.git.linux@leemhuis.info Signed-off-by: Jonathan Corbet --- .../admin-guide/reporting-issues.rst | 73 +++++++++---------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst index d7ac13f789cce..ec62151fe6720 100644 --- a/Documentation/admin-guide/reporting-issues.rst +++ b/Documentation/admin-guide/reporting-issues.rst @@ -1,14 +1,5 @@ .. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) -.. - If you want to distribute this text under CC-BY-4.0 only, please use 'The - Linux kernel developers' for author attribution and link this as source: - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst -.. - Note: Only the content of this RST file as found in the Linux kernel sources - is available under CC-BY-4.0, as versions of this text that were processed - (for example by the kernel's build system) might contain content taken from - files which use a more restrictive license. - +.. See the bottom of this file for additional redistribution information. Reporting issues ++++++++++++++++ @@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get handled slightly differently in the reporting process. Three type of cases qualify: regressions, security issues, and really severe problems. -You deal with a 'regression' if something that worked with an older version of -the Linux kernel does not work with a newer one or somehow works worse with it. -It thus is a regression when a WiFi driver that did a fine job with Linux 5.7 -somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if -an application shows erratic behavior with a newer kernel, which might happen -due to incompatible changes in the interface between the kernel and the -userland (like procfs and sysfs). Significantly reduced performance or -increased power consumption also qualify as regression. But keep in mind: the -new kernel needs to be built with a configuration that is similar to the one -from the old kernel (see below how to achieve that). That's because the kernel -developers sometimes can not avoid incompatibilities when implementing new -features; but to avoid regressions such features have to be enabled explicitly -during build time configuration. +You deal with a regression if some application or practical use case running +fine with one Linux kernel works worse or not at all with a newer version +compiled using a similar configuration. The document +Documentation/admin-guide/reporting-regressions.rst explains this in more +detail. It also provides a good deal of other information about regressions you +might want to be aware of; it for example explains how to add your issue to the +list of tracked regressions, to ensure it won't fall through the cracks. What qualifies as security issue is left to your judgment. Consider reading -'Documentation/admin-guide/security-bugs.rst' before proceeding, as it +Documentation/admin-guide/security-bugs.rst before proceeding, as it provides additional details how to best handle security issues. An issue is a 'really severe problem' when something totally unacceptably bad @@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was not tainted when it noticed the problem; it was tainted if you see 'Tainted:' followed by a few spaces and some letters. -If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst' +If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst to find out why. Try to eliminate the reason. Often it's caused by one these three things: @@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to reproduce it themselves. To find the change there is a process called 'bisection' which the document -'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process +Documentation/admin-guide/bug-bisect.rst describes in detail. That process will often require you to build about ten to twenty kernel images, trying to reproduce the issue with each of them before building the next. Yes, that takes some time, but don't worry, it works a lot quicker than most people assume. @@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by the kernel and not by something else, as outlined above already. In the whole process keep in mind: an issue only qualifies as regression if the -older and the newer kernel got built with a similar configuration. The best way -to archive this: copy the configuration file (``.config``) from the old working -kernel freshly to each newer kernel version you try. Afterwards run ``make -olddefconfig`` to adjust it for the needs of the new version. +older and the newer kernel got built with a similar configuration. This can be +achieved by using ``make olddefconfig``, as explained in more detail by +Documentation/admin-guide/reporting-regressions.rst; that document also +provides a good deal of other information about regressions you might want to be +aware of. Write and send the report @@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward the report's text to these addresses; but on top of it put a small note where you mention that you filed it with a link to the ticket. -See 'Documentation/admin-guide/security-bugs.rst' for more information. +See Documentation/admin-guide/security-bugs.rst for more information. Duties after the report went out @@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to pinpoint the exact change that causes the issue (which then can easily get reverted to fix the issue quickly). Hence consider to do a proper bisection right away if time permits. See the section 'Special care for regressions' and -the document 'Documentation/admin-guide/bug-bisect.rst' for details how to +the document Documentation/admin-guide/bug-bisect.rst for details how to perform one. In case of a successful bisection add the author of the culprit to the recipients; also CC everyone in the signed-off-by chain, which you find at the end of its commit message. @@ -1594,7 +1580,7 @@ Some fixes are too complex Even small and seemingly obvious code-changes sometimes introduce new and totally unexpected problems. The maintainers of the stable and longterm kernels are very aware of that and thus only apply changes to these kernels that are -within rules outlined in 'Documentation/process/stable-kernel-rules.rst'. +within rules outlined in Documentation/process/stable-kernel-rules.rst. Complex or risky changes for example do not qualify and thus only get applied to mainline. Other fixes are easy to get backported to the newest stable and @@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time. .. - This text is maintained by Thorsten Leemhuis . If you - spot a typo or small mistake, feel free to let him know directly and he'll - fix it. You are free to do the same in a mostly informal way if you want - to contribute changes to the text, but for copyright reasons please CC + end-of-content +.. + This document is maintained by Thorsten Leemhuis . If + you spot a typo or small mistake, feel free to let him know directly and + he'll fix it. You are free to do the same in a mostly informal way if you + want to contribute changes to the text, but for copyright reasons please CC linux-doc@vger.kernel.org and "sign-off" your contribution as Documentation/process/submitting-patches.rst outlines in the section "Sign your work - the Developer's Certificate of Origin". +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. -- GitLab From ac982578e7d340dc4f4fd243f4a4b24787d28c3f Mon Sep 17 00:00:00 2001 From: Krishna Yarlagadda Date: Tue, 22 Feb 2022 23:26:07 +0530 Subject: [PATCH 0644/1586] spi: tegra210-quad: use device_reset method Use device_reset api to replace duplicate code in driver to call reset_control_get api with reset handle. Signed-off-by: Krishna Yarlagadda Link: https://lore.kernel.org/r/20220222175611.58051-2-kyarlagadda@nvidia.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index cb00ac2fc7d8e..a353f2a9abd44 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -137,7 +137,6 @@ struct tegra_qspi { spinlock_t lock; struct clk *clk; - struct reset_control *rst; void __iomem *base; phys_addr_t phys; unsigned int irq; @@ -948,9 +947,8 @@ static void tegra_qspi_handle_error(struct tegra_qspi *tqspi) dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg); tegra_qspi_dump_regs(tqspi); tegra_qspi_flush_fifos(tqspi, true); - reset_control_assert(tqspi->rst); - udelay(2); - reset_control_deassert(tqspi->rst); + if (device_reset(tqspi->dev) < 0) + dev_warn_once(tqspi->dev, "device reset failed\n"); } static void tegra_qspi_transfer_end(struct spi_device *spi) @@ -1251,13 +1249,6 @@ static int tegra_qspi_probe(struct platform_device *pdev) return ret; } - tqspi->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL); - if (IS_ERR(tqspi->rst)) { - ret = PTR_ERR(tqspi->rst); - dev_err(&pdev->dev, "failed to get reset control: %d\n", ret); - return ret; - } - tqspi->max_buf_size = QSPI_FIFO_DEPTH << 2; tqspi->dma_buf_size = DEFAULT_QSPI_DMA_BUF_LEN; @@ -1279,9 +1270,8 @@ static int tegra_qspi_probe(struct platform_device *pdev) goto exit_pm_disable; } - reset_control_assert(tqspi->rst); - udelay(2); - reset_control_deassert(tqspi->rst); + if (device_reset(tqspi->dev) < 0) + dev_warn_once(tqspi->dev, "device reset failed\n"); tqspi->def_command1_reg = QSPI_M_S | QSPI_CS_SW_HW | QSPI_CS_SW_VAL; tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1); -- GitLab From de2f678b11bdcbabb6d804c543f9a3325c0e83bf Mon Sep 17 00:00:00 2001 From: Krishna Yarlagadda Date: Tue, 22 Feb 2022 23:26:08 +0530 Subject: [PATCH 0645/1586] spi: Add Tegra234 QUAD SPI compatible Add compatible string for Tegra234 for Tegra QUAD SPI Signed-off-by: Krishna Yarlagadda Link: https://lore.kernel.org/r/20220222175611.58051-3-kyarlagadda@nvidia.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml index 35a8045b2c70d..6efea8970e623 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml @@ -19,6 +19,7 @@ properties: - nvidia,tegra210-qspi - nvidia,tegra186-qspi - nvidia,tegra194-qspi + - nvidia,tegra234-qspi reg: maxItems: 1 -- GitLab From ea23f0e148b82e5bcbc6c814926f53133552f0f3 Mon Sep 17 00:00:00 2001 From: Krishna Yarlagadda Date: Tue, 22 Feb 2022 23:26:09 +0530 Subject: [PATCH 0646/1586] spi: tegra210-quad: add new chips to compatible Add support for Tegra234 and soc data to select capabilities. Signed-off-by: Krishna Yarlagadda Link: https://lore.kernel.org/r/20220222175611.58051-4-kyarlagadda@nvidia.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index a353f2a9abd44..3725ee5331ae0 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -125,6 +125,10 @@ #define QSPI_DMA_TIMEOUT (msecs_to_jiffies(1000)) #define DEFAULT_QSPI_DMA_BUF_LEN (64 * 1024) +struct tegra_qspi_soc_data { + bool has_dma; +}; + struct tegra_qspi_client_data { int tx_clk_tap_delay; int rx_clk_tap_delay; @@ -184,6 +188,7 @@ struct tegra_qspi { u32 *tx_dma_buf; dma_addr_t tx_dma_phys; struct dma_async_tx_descriptor *tx_dma_desc; + const struct tegra_qspi_soc_data *soc_data; }; static inline u32 tegra_qspi_readl(struct tegra_qspi *tqspi, unsigned long offset) @@ -1191,10 +1196,32 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data) return handle_dma_based_xfer(tqspi); } +static struct tegra_qspi_soc_data tegra210_qspi_soc_data = { + .has_dma = true, +}; + +static struct tegra_qspi_soc_data tegra186_qspi_soc_data = { + .has_dma = true, +}; + +static struct tegra_qspi_soc_data tegra234_qspi_soc_data = { + .has_dma = false, +}; + static const struct of_device_id tegra_qspi_of_match[] = { - { .compatible = "nvidia,tegra210-qspi", }, - { .compatible = "nvidia,tegra186-qspi", }, - { .compatible = "nvidia,tegra194-qspi", }, + { + .compatible = "nvidia,tegra210-qspi", + .data = &tegra210_qspi_soc_data, + }, { + .compatible = "nvidia,tegra186-qspi", + .data = &tegra186_qspi_soc_data, + }, { + .compatible = "nvidia,tegra194-qspi", + .data = &tegra186_qspi_soc_data, + }, { + .compatible = "nvidia,tegra234-qspi", + .data = &tegra234_qspi_soc_data, + }, {} }; -- GitLab From 3a4f7ef4bed5bdc77a1ac8132f9f0650bbcb3eae Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Fri, 18 Feb 2022 02:37:04 +0000 Subject: [PATCH 0647/1586] arm64: Change elfcore for_each_mte_vma() to use VMA iterator Rework for_each_mte_vma() to use a VMA iterator instead of an explicit linked-list. This will allow easy integration with the maple tree work which removes the VMA list altogether. Signed-off-by: Liam R. Howlett Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220218023650.672072-1-Liam.Howlett@oracle.com [will: Folded in fix from Catalin] Link: https://lore.kernel.org/r/YhUcywqIhmHvX6dG@arm.com Signed-off--by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3455ee4acc04c..3ed39c61a510c 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,9 +8,16 @@ #include #include -#define for_each_mte_vma(tsk, vma) \ +#ifndef VMA_ITERATOR +#define VMA_ITERATOR(name, mm, addr) \ + struct mm_struct *name = mm +#define for_each_vma(vmi, vma) \ + for (vma = vmi->mmap; vma; vma = vma->vm_next) +#endif + +#define for_each_mte_vma(vmi, vma) \ if (system_supports_mte()) \ - for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + for_each_vma(vmi, vma) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -65,8 +72,9 @@ Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) vma_count++; return vma_count; @@ -75,8 +83,9 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { struct elf_phdr phdr; phdr.p_type = PT_ARM_MEMTAG_MTE; @@ -100,8 +109,9 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -110,8 +120,9 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; -- GitLab From da844beb6d9f97cb6fe4b443f9610a9fcc534f9d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 12:49:50 +0000 Subject: [PATCH 0648/1586] arm64: cpufeature: Account min_field_value when cheking secondaries for PAuth In case, both boot_val and sec_val have value below min_field_value we would wrongly report that address authentication is supported. It is not a big issue because we enable address authentication based on boot cpu (and check there is correct). Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224124952.119612-2-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8df..3271770b60d66 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1829,7 +1829,7 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), entry->field_pos, entry->sign); - return sec_val == boot_val; + return (sec_val >= entry->min_field_value) && (sec_val == boot_val); } static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, -- GitLab From be3256a086afb4048baf18e6a35a3a81482aa2fa Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 12:49:51 +0000 Subject: [PATCH 0649/1586] arm64: cpufeature: Mark existing PAuth architected algorithm as QARMA5 In preparation of supporting PAuth QARMA3 architected algorithm mark existing one as QARMA5, so we can distingwish between two. Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224124952.119612-3-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 20 ++++++++++++-------- arch/arm64/tools/cpucaps | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3271770b60d66..1b955bea28569 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1835,15 +1835,19 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || - has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + + return apa || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, int __unused) { - return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); + + return gpa || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ @@ -2230,8 +2234,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, #ifdef CONFIG_ARM64_PTR_AUTH { - .desc = "Address authentication (architected algorithm)", - .capability = ARM64_HAS_ADDRESS_AUTH_ARCH, + .desc = "Address authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, @@ -2255,8 +2259,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_address_auth_metacap, }, { - .desc = "Generic authentication (architected algorithm)", - .capability = ARM64_HAS_GENERIC_AUTH_ARCH, + .desc = "Generic authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 9c65b1e25a965..4c39247581f64 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -7,7 +7,7 @@ BTI HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 HAS_ADDRESS_AUTH -HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_ARCH_QARMA5 HAS_ADDRESS_AUTH_IMP_DEF HAS_AMU_EXTN HAS_ARMv8_4_TTL @@ -21,7 +21,7 @@ HAS_E0PD HAS_ECV HAS_EPAN HAS_GENERIC_AUTH -HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING HAS_LDAPR -- GitLab From def8c222f054d18aac1fd065a50b9db5feaefa9d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 12:49:52 +0000 Subject: [PATCH 0650/1586] arm64: Add support of PAuth QARMA3 architected algorithm QARMA3 is relaxed version of the QARMA5 algorithm which expected to reduce the latency of calculation while still delivering a suitable level of security. Support for QARMA3 can be discovered via ID_AA64ISAR2_EL1 APA3, bits [15:12] Indicates whether the QARMA3 algorithm is implemented in the PE for address authentication in AArch64 state. GPA3, bits [11:8] Indicates whether the QARMA3 algorithm is implemented in the PE for generic code authentication in AArch64 state. Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224124952.119612-4-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm_pointer_auth.h | 3 ++ arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/include/asm/sysreg.h | 12 ++++++ arch/arm64/kernel/cpufeature.c | 41 ++++++++++++++++++- arch/arm64/kernel/idreg-override.c | 16 +++++++- arch/arm64/kvm/arm.c | 1 + .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 5 +++ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 14 +++++++ arch/arm64/kvm/sys_regs.c | 5 +++ arch/arm64/tools/cpucaps | 2 + 11 files changed, 97 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index f1bba5fc61c49..ead62f7dd2694 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -60,6 +60,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ef6be92b1921a..fe7137ff61903 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -854,6 +854,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64isar1_override; +extern struct arm64_ftr_override id_aa64isar2_override; u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 462882f356c77..aa7fa2a08f060 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004aee..cbe416462b886 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -773,6 +773,8 @@ #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64isar2 */ +#define ID_AA64ISAR2_APA3_SHIFT 12 +#define ID_AA64ISAR2_GPA3_SHIFT 8 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 @@ -786,6 +788,16 @@ #define ID_AA64ISAR2_WFXT_NI 0x0 #define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 +#define ID_AA64ISAR2_APA3_NI 0x0 +#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 +#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 + +#define ID_AA64ISAR2_GPA3_NI 0x0 +#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1b955bea28569..f6ecad8fc1c1c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -226,6 +226,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -596,6 +600,7 @@ static const struct arm64_ftr_bits ftr_raz[] = { struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar2_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -644,6 +649,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, + &id_aa64isar2_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -1837,8 +1844,9 @@ static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, { bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); - return apa || api; + return apa || apa3 || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, @@ -1846,8 +1854,9 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, { bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); + bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3); - return gpa || gpi; + return gpa || gpa3 || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ @@ -2243,6 +2252,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, .matches = has_address_auth_cpucap, }, + { + .desc = "Address authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .matches = has_address_auth_cpucap, + }, { .desc = "Address authentication (IMP DEF algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF, @@ -2268,6 +2287,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, .matches = has_cpuid_feature, }, + { + .desc = "Generic authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .matches = has_cpuid_feature, + }, { .desc = "Generic authentication (IMP DEF algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF, @@ -2415,6 +2444,10 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED) }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, + FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) + }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) @@ -2427,6 +2460,10 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, + FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) + }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d8e606fe3c21b..8a2ceb5916863 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -17,7 +17,7 @@ #define FTR_DESC_NAME_LEN 20 #define FTR_DESC_FIELD_LEN 10 #define FTR_ALIAS_NAME_LEN 30 -#define FTR_ALIAS_OPTION_LEN 80 +#define FTR_ALIAS_OPTION_LEN 116 struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; @@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = { }, }; +static const struct ftr_set_desc isar2 __initconst = { + .name = "id_aa64isar2", + .override = &id_aa64isar2_override, + .fields = { + { "gpa3", ID_AA64ISAR2_GPA3_SHIFT }, + { "apa3", ID_AA64ISAR2_APA3_SHIFT }, + {} + }, +}; + extern struct arm64_ftr_override kaslr_feature_override; static const struct ftr_set_desc kaslr __initconst = { @@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, &pfr1, &isar1, + &isar2, &kaslr, }; @@ -100,7 +111,8 @@ static const struct { { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " - "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, + "id_aa64isar1.api=0 id_aa64isar1.apa=0 " + "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ecc5958e27fe2..f3bfc0ddeb0b9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1870,6 +1870,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); + kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index eea1f6a537230..5ad626527d411 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -192,6 +192,11 @@ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ ) +#define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ) + u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 792cf6e6ac920..33f5181af330d 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val; u64 id_aa64pfr1_el1_sys_val; u64 id_aa64isar0_el1_sys_val; u64 id_aa64isar1_el1_sys_val; +u64 id_aa64isar2_el1_sys_val; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; @@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) return id_aa64isar1_el1_sys_val & allow_mask; } +static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + + return id_aa64isar2_el1_sys_val & allow_mask; +} + static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) { u64 set_mask; @@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) return get_pvm_id_aa64isar0(vcpu); case SYS_ID_AA64ISAR1_EL1: return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64ISAR2_EL1: + return get_pvm_id_aa64isar2(vcpu); case SYS_ID_AA64MMFR0_EL1: return get_pvm_id_aa64mmfr0(vcpu); case SYS_ID_AA64MMFR1_EL1: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4dc2fba316fff..baa65292bbc2a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); break; + case SYS_ID_AA64ISAR2_EL1: + if (!vcpu_has_ptrauth(vcpu)) + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 4c39247581f64..162bc2443217b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -7,6 +7,7 @@ BTI HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH_QARMA3 HAS_ADDRESS_AUTH_ARCH_QARMA5 HAS_ADDRESS_AUTH_IMP_DEF HAS_AMU_EXTN @@ -21,6 +22,7 @@ HAS_E0PD HAS_ECV HAS_EPAN HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH_QARMA3 HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING -- GitLab From 83854c231262d2ad43c4fb32414ba25304f925d8 Mon Sep 17 00:00:00 2001 From: Li-hao Kuo Date: Fri, 25 Feb 2022 14:31:53 +0800 Subject: [PATCH 0651/1586] spi: dt-bindings: remove unused required property fix issue /builds/robherring/linux-dt/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.example.dt.yaml: spi@9C002D80: 'clocks-names' is a required property From schema: /builds/robherring/linux-dt/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml delete unused required(clock-name) Fixes: 3b8ab4da34 ("spi: Fix test error for sp7021") Reported-by: Rob Herring Signed-off-by: Li-hao Kuo Link: https://lore.kernel.org/r/097bbc8b703b17e8fb3e3f6f6d2f97fe668bd5c5.1645770648.git.lhjeff911@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml index 298eac28c40ff..3a58cf0f1ec86 100644 --- a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml +++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml @@ -50,7 +50,6 @@ required: - interrupts - interrupt-names - clocks - - clocks-names - resets - pinctrl-names - pinctrl-0 -- GitLab From 032e6c33790dc51836a40ef702de4c9e0941145f Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 16:47:39 +0000 Subject: [PATCH 0652/1586] arm64: cpufeature: Remove cpu_has_fwb() check cpu_has_fwb() is supposed to warn user is following architectural requirement is not valid: LoUU, bits [29:27] - Level of Unification Uniprocessor for the cache hierarchy. Note When FEAT_S2FWB is implemented, the architecture requires that this field is zero so that no levels of data cache need to be cleaned in order to manage coherency with instruction fetches. LoUIS, bits [23:21] - Level of Unification Inner Shareable for the cache hierarchy. Note When FEAT_S2FWB is implemented, the architecture requires that this field is zero so that no levels of data cache need to be cleaned in order to manage coherency with instruction fetches. It is not really clear what user have to do if assertion fires. Having assertions about the CPU design like this inspire even more assertions to be added and the kernel definitely is not the right place for that, so let's remove cpu_has_fwb() altogether. Signed-off-by: Vladimir Murzin Link: https://lore.kernel.org/r/20220224164739.119168-1-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8df..6d1da359f8041 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1775,14 +1775,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) -{ - u64 val = read_sysreg_s(SYS_CLIDR_EL1); - - /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ - WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val)); -} - #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -2144,7 +2136,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR2_FWB_SHIFT, .min_field_value = 1, .matches = has_cpuid_feature, - .cpu_enable = cpu_has_fwb, }, { .desc = "ARMv8.4 Translation Table Level", -- GitLab From 4013e26670c590944abdab56c4fa797527b74325 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 18 Feb 2022 00:12:09 -0800 Subject: [PATCH 0653/1586] arm64: module: remove (NOLOAD) from linker script On ELF, (NOLOAD) sets the section type to SHT_NOBITS[1]. It is conceptually inappropriate for .plt and .text.* sections which are always SHT_PROGBITS. In GNU ld, if PLT entries are needed, .plt will be SHT_PROGBITS anyway and (NOLOAD) will be essentially ignored. In ld.lld, since https://reviews.llvm.org/D118840 ("[ELF] Support (TYPE=) to customize the output section type"), ld.lld will report a `section type mismatch` error. Just remove (NOLOAD) to fix the error. [1] https://lld.llvm.org/ELF/linker_script.html As of today, "The section should be marked as not loadable" on https://sourceware.org/binutils/docs/ld/Output-Section-Type.html is outdated for ELF. Tested-by: Nathan Chancellor Reported-by: Nathan Chancellor Signed-off-by: Fangrui Song Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220218081209.354383-1-maskray@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.lds.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d29..094701ec5500b 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS -- GitLab From 879358fc670dbc8dc3b0e3e4975ff39e38847707 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:30 +0000 Subject: [PATCH 0654/1586] arm64: Define CPACR_EL1_FPEN similarly to other floating point controls The base floating point, SVE and SME all have enable controls for EL0 and EL1 in CPACR_EL1 which have a similar layout and function. Currently the basic floating point enable FPEN is defined differently to the SVE control, specified as a single define in kvm_arm.h rather than in sysreg.h. Move the define to sysreg.h and provide separate EL0 and EL1 control bits so code managing the different floating point enables can look consistent. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 1 - arch/arm64/include/asm/sysreg.h | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 01d47c5886dc4..eec790842fe23 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -355,7 +355,6 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) -#define CPACR_EL1_FPEN (3 << 20) #define CPACR_EL1_TTA (1 << 28) #define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004aee..1da4c43d597d0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1097,6 +1097,10 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ +#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_FPEN (CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) -- GitLab From 3bb72d86d80eb9296d43f9e807b6f9ff58049552 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:31 +0000 Subject: [PATCH 0655/1586] arm64: Always use individual bits in CPACR floating point enables CPACR_EL1 has several bitfields for controlling traps for floating point features to EL1, each of which has a separate bits for EL0 and EL1. Marc Zyngier noted that we are not consistent in our use of defines to manipulate these, sometimes using a define covering the whole field and sometimes using defines for the individual bits. Make this consistent by expanding the whole field defines where they are used (currently only in the KVM code) and deleting them so that no further uses can be introduced. Suggested-by: Marc Zyngier Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 3 ++- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index eec790842fe23..1767ded838880 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -356,6 +356,7 @@ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) #define CPACR_EL1_TTA (1 << 28) -#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) +#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ + CPACR_EL1_ZEN_EL1EN) #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1da4c43d597d0..e66dd9ebc3374 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1099,11 +1099,9 @@ #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ -#define CPACR_EL1_FPEN (CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ -#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) /* TCR EL1 Bit Definitions */ #define SYS_TCR_EL1_TCMA1 (BIT(58)) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 701cfb964905d..6379a1e3e6e51 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ if (has_vhe()) { - reg = CPACR_EL1_FPEN; + reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; if (sve_guest) - reg |= CPACR_EL1_ZEN; + reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; sysreg_clear_set(cpacr_el1, 0, reg); } else { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 11d053fdd604b..619353b06e38f 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -38,7 +38,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; + val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to @@ -53,9 +53,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu) if (update_fp_enabled(vcpu)) { if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; + val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { - val &= ~CPACR_EL1_FPEN; + val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); __activate_traps_fpsimd32(vcpu); } -- GitLab From 0a2eec83c2c23cf609e781732b338a9a4f18e00c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:32 +0000 Subject: [PATCH 0656/1586] arm64: cpufeature: Always specify and use a field width for capabilities Since all the fields in the main ID registers are 4 bits wide we have up until now not bothered specifying the width in the code. Since we now wish to use this mechanism to enumerate features from the floating point feature registers which do not follow this pattern add a width to the table. This means updating all the existing table entries but makes it less likely that we run into issues in future due to implicitly assuming a 4 bit width. Signed-off-by: Mark Brown Cc: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 167 +++++++++++++++++----------- 2 files changed, 102 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ef6be92b1921a..2728abd9cae4b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -356,6 +356,7 @@ struct arm64_cpu_capabilities { struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; + u8 field_width; u8 min_field_value; u8 hwcap_type; bool sign; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8df..64a748c2b351f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1307,7 +1307,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); + int val = cpuid_feature_extract_field_width(reg, entry->field_pos, + entry->field_width, + entry->sign); return val >= entry->min_field_value; } @@ -1955,6 +1957,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR0_EL1, .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -1966,6 +1969,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, .cpu_enable = cpu_enable_pan, @@ -1979,6 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 3, }, @@ -1991,6 +1996,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 2, }, @@ -2015,6 +2021,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM @@ -2026,6 +2033,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, { @@ -2046,6 +2054,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { */ .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, @@ -2065,6 +2074,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2075,6 +2085,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 2, }, #endif @@ -2086,6 +2097,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_SVE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_SVE, .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, @@ -2100,6 +2112,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_RAS_V1, .cpu_enable = cpu_clear_disr, }, @@ -2118,6 +2131,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_AMU_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_AMU, .cpu_enable = cpu_amu_enable, }, @@ -2142,6 +2156,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, .cpu_enable = cpu_has_fwb, @@ -2153,6 +2168,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, }, @@ -2163,6 +2179,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64ISAR0_TLB_RANGE, }, @@ -2181,6 +2198,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .field_width = 4, .min_field_value = 2, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, @@ -2193,6 +2211,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2202,6 +2221,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, }, @@ -2214,6 +2234,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .field_width = 4, .min_field_value = 1, .cpu_enable = cpu_enable_cnp, }, @@ -2225,6 +2246,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2236,6 +2258,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, .matches = has_address_auth_cpucap, }, @@ -2246,6 +2269,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, .matches = has_address_auth_cpucap, }, @@ -2261,6 +2285,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, .matches = has_cpuid_feature, }, @@ -2271,6 +2296,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, .matches = has_cpuid_feature, }, @@ -2291,6 +2317,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = can_use_gic_priorities, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2302,6 +2329,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, + .field_width = 4, .field_pos = ID_AA64MMFR2_E0PD_SHIFT, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2316,6 +2344,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2333,6 +2362,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = bti_enable, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_BT_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_BT_BTI, .sign = FTR_UNSIGNED, }, @@ -2345,6 +2375,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE, .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, @@ -2356,6 +2387,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE_ASYMM, .sign = FTR_UNSIGNED, }, @@ -2367,16 +2399,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, }, {}, }; -#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ +#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ + .field_width = width, \ .sign = s, \ .min_field_value = min_value, @@ -2386,10 +2420,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap_type = cap_type, \ .hwcap = cap, \ -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ +#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ - HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ } #define HWCAP_MULTI_CAP(list, cap_type, cap) \ @@ -2409,11 +2443,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED) + 4, FTR_UNSIGNED, + ID_AA64ISAR1_APA_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) }, {}, }; @@ -2421,77 +2456,77 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) }, {}, }; #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), - HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), #endif /* CONFIG_ARM64_MTE */ - HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), - HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), + HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), {}, }; @@ -2520,15 +2555,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), - HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; -- GitLab From 32de73e89099c3f243032a733d3a64d417327a70 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:34 +0000 Subject: [PATCH 0657/1586] kselftest/arm64: signal: Allow tests to be incompatible with features Some features may invalidate some tests, for example by supporting an operation which would trap otherwise. Allow tests to list features that they are incompatible with so we can cover the case where a signal will be generated without disruption on systems where that won't happen. Signed-off-by: Mark Brown Reviewed-by: Shuah Khan Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-6-broonie@kernel.org Signed-off-by: Will Deacon --- .../selftests/arm64/signal/test_signals.h | 1 + .../arm64/signal/test_signals_utils.c | 34 ++++++++++++++----- .../arm64/signal/test_signals_utils.h | 2 ++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index ebe8694dbef0f..f909b70d9e980 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -53,6 +53,7 @@ struct tdescr { char *name; char *descr; unsigned long feats_required; + unsigned long feats_incompatible; /* bitmask of effectively supported feats: populated at run-time */ unsigned long feats_supported; bool initialized; diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 2f8c23af3b5e0..5743897984b0e 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats) { size_t flen = MAX_FEATS_SZ - 1; + feats_string[0] = '\0'; + for (int i = 0; i < FMAX_END; i++) { if (feats & (1UL << i)) { size_t tlen = strlen(feats_names[i]); @@ -256,7 +258,7 @@ int test_init(struct tdescr *td) td->minsigstksz = MINSIGSTKSZ; fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz); - if (td->feats_required) { + if (td->feats_required || td->feats_incompatible) { td->feats_supported = 0; /* * Checking for CPU required features using both the @@ -267,15 +269,29 @@ int test_init(struct tdescr *td) if (getauxval(AT_HWCAP) & HWCAP_SVE) td->feats_supported |= FEAT_SVE; if (feats_ok(td)) { - fprintf(stderr, - "Required Features: [%s] supported\n", - feats_to_string(td->feats_required & - td->feats_supported)); + if (td->feats_required & td->feats_supported) + fprintf(stderr, + "Required Features: [%s] supported\n", + feats_to_string(td->feats_required & + td->feats_supported)); + if (!(td->feats_incompatible & td->feats_supported)) + fprintf(stderr, + "Incompatible Features: [%s] absent\n", + feats_to_string(td->feats_incompatible)); } else { - fprintf(stderr, - "Required Features: [%s] NOT supported\n", - feats_to_string(td->feats_required & - ~td->feats_supported)); + if ((td->feats_required & td->feats_supported) != + td->feats_supported) + fprintf(stderr, + "Required Features: [%s] NOT supported\n", + feats_to_string(td->feats_required & + ~td->feats_supported)); + if (td->feats_incompatible & td->feats_supported) + fprintf(stderr, + "Incompatible Features: [%s] supported\n", + feats_to_string(td->feats_incompatible & + ~td->feats_supported)); + + td->result = KSFT_SKIP; return 0; } diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 6772b5c8d274d..f3aa99ba67bb7 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -18,6 +18,8 @@ void test_result(struct tdescr *td); static inline bool feats_ok(struct tdescr *td) { + if (td->feats_incompatible & td->feats_supported) + return false; return (td->feats_required & td->feats_supported) == td->feats_required; } -- GitLab From 3f9ab2a6986fe2930782408cc2a6c25e3e5cb98f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:21 +0000 Subject: [PATCH 0658/1586] arm64/mte: Document ABI for asymmetric mode MTE3 adds a new mode which is synchronous for reads but asynchronous for writes. Document the userspace ABI for this feature, we call the new mode ASYMM and add a new prctl flag and mte_tcf_preferred value for it. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220216173224.2342152-2-broonie@kernel.org Signed-off-by: Will Deacon --- .../arm64/memory-tagging-extension.rst | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb6..42677d48a114b 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -76,6 +76,9 @@ configurable behaviours: with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting address is unknown). +- *Asymmetric* - Reads are handled as for synchronous mode while writes + are handled as for asynchronous mode. + The user can select the above modes, per thread, using the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags`` contains any number of the following values in the ``PR_MTE_TCF_MASK`` @@ -85,6 +88,7 @@ bit-field: (ignored if combined with other options) - ``PR_MTE_TCF_SYNC`` - *Synchronous* tag check fault mode - ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode +- ``PR_MTE_TCF_ASYMM`` - *Asymmetric* tag check fault mode If no modes are specified, tag check faults are ignored. If a single mode is specified, the program will run in that mode. If multiple @@ -139,18 +143,23 @@ tag checking mode as the CPU's preferred tag checking mode. The preferred tag checking mode for each CPU is controlled by ``/sys/devices/system/cpu/cpu/mte_tcf_preferred``, to which a -privileged user may write the value ``async`` or ``sync``. The default -preferred mode for each CPU is ``async``. +privileged user may write the value ``async``, ``sync`` or ``asymm``. The +default preferred mode for each CPU is ``async``. To allow a program to potentially run in the CPU's preferred tag checking mode, the user program may set multiple tag check fault mode bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking -mode is in the task's set of provided tag checking modes (this will -always be the case at present because the kernel only supports two -tag checking modes, but future kernels may support more modes), that +mode is in the task's set of provided tag checking modes, that mode will be selected. Otherwise, one of the modes in the task's mode -set will be selected in a currently unspecified manner. +selected by the kernel using the preference order: + + 1. Asynchronous + 2. Asymmetric + 3. Synchronous + +If asymmetric mode is specified by the program but not supported by +either the system or the kernel then an error will be returned. Initial process state --------------------- -- GitLab From cb627397e02bc65e44912daebfe0bbe6b0ecd384 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:22 +0000 Subject: [PATCH 0659/1586] arm64/mte: Add a little bit of documentation for mte_update_sctlr_user() The code isn't that obscure but it probably won't hurt to have a little bit more documentation for anyone trying to find out where everything actually takes effect. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/mte.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f983795b5eda2..b9a2d13e85f6d 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void) } #endif +/* + * This is where we actually resolve the system and process MTE mode + * configuration into an actual value in SCTLR_EL1 that affects + * userspace. + */ static void mte_update_sctlr_user(struct task_struct *task) { /* @@ -199,8 +204,17 @@ static void mte_update_sctlr_user(struct task_struct *task) unsigned long pref, resolved_mte_tcf; pref = __this_cpu_read(mte_tcf_preferred); + /* + * If there is no overlap between the system preferred and + * program requested values go with what was requested. + */ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; sctlr &= ~SCTLR_EL1_TCF0_MASK; + /* + * Pick an actual setting. The order in which we check for + * set bits and map into register values determines our + * default order. + */ if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) -- GitLab From d082a0255fcb8fcb4bd8257df111f2caa67086bc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:23 +0000 Subject: [PATCH 0660/1586] arm64/mte: Add hwcap for asymmetric mode Allow userspace to detect support for asymmetric mode by providing a hwcap for it, using the official feature name FEAT_MTE3. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-4-broonie@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/elf_hwcaps.rst | 5 +++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 9 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index b72ff17d600ae..a8f30963e550d 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -259,6 +259,11 @@ HWCAP2_RPRES Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. +HWCAP2_MTE3 + + Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described + by Documentation/arm64/memory-tagging-extension.rst. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index f68fbb2074730..8db5ec0089dbd 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -108,6 +108,7 @@ #define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) +#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f03731847d9df..99cb5d383048d 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -78,5 +78,6 @@ #define HWCAP2_ECV (1 << 19) #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) +#define HWCAP2_MTE3 (1 << 22) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8df..5809d5d59258f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2488,6 +2488,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 591c18a889a56..330b92ea863aa 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -97,6 +97,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_ECV] = "ecv", [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", + [KERNEL_HWCAP_MTE3] = "mte3", }; #ifdef CONFIG_COMPAT -- GitLab From 766121ba5de38a6f67980ec24a6af76c55def100 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:24 +0000 Subject: [PATCH 0661/1586] arm64/mte: Add userspace interface for enabling asymmetric mode The architecture provides an asymmetric mode for MTE where tag mismatches are checked asynchronously for stores but synchronously for loads. Allow userspace processes to select this and make it available as a default mode via the existing per-CPU sysfs interface. Since there PR_MTE_TCF_ values are a bitmask (allowing the kernel to choose between the multiple modes) and there are no free bits adjacent to the existing PR_MTE_TCF_ bits the set of bits used to specify the mode becomes disjoint. Programs using the new interface should be aware of this and programs that do not use it will not see any change in behaviour. When userspace requests two possible modes but the system default for the CPU is the third mode (eg, default is synchronous but userspace requests either asynchronous or asymmetric) the preference order is: ASYMM > ASYNC > SYNC This situation is not currently possible since there are only two modes and it is mandatory to have a system default so there could be no ambiguity and there is no ABI change. The chosen order is basically arbitrary as we do not have a clear metric for what is better here. If userspace requests specifically asymmetric mode via the prctl() and the system does not support it then we will return an error, this mirrors how we handle the case where userspace enables MTE on a system that does not support MTE at all and the behaviour that will be seen if running on an older kernel that does not support userspace use of asymmetric mode. Attempts to set asymmetric mode as the default mode will result in an error if the system does not support it. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/mte.c | 12 +++++++++++- arch/arm64/kernel/process.c | 5 ++++- include/uapi/linux/prctl.h | 4 +++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6f41b65f99628..73e38d9a540ce 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -21,6 +21,7 @@ #define MTE_CTRL_TCF_SYNC (1UL << 16) #define MTE_CTRL_TCF_ASYNC (1UL << 17) +#define MTE_CTRL_TCF_ASYMM (1UL << 18) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b9a2d13e85f6d..cbbd8d93fc502 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -215,7 +215,9 @@ static void mte_update_sctlr_user(struct task_struct *task) * set bits and map into register values determines our * default order. */ - if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) + if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM) + sctlr |= SCTLR_EL1_TCF0_ASYMM; + else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SCTLR_EL1_TCF0_SYNC; @@ -309,6 +311,8 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) mte_ctrl |= MTE_CTRL_TCF_ASYNC; if (arg & PR_MTE_TCF_SYNC) mte_ctrl |= MTE_CTRL_TCF_SYNC; + if (arg & PR_MTE_TCF_ASYMM) + mte_ctrl |= MTE_CTRL_TCF_ASYMM; task->thread.mte_ctrl = mte_ctrl; if (task == current) { @@ -337,6 +341,8 @@ long get_mte_ctrl(struct task_struct *task) ret |= PR_MTE_TCF_ASYNC; if (mte_ctrl & MTE_CTRL_TCF_SYNC) ret |= PR_MTE_TCF_SYNC; + if (mte_ctrl & MTE_CTRL_TCF_ASYMM) + ret |= PR_MTE_TCF_ASYMM; return ret; } @@ -484,6 +490,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, return sysfs_emit(buf, "async\n"); case MTE_CTRL_TCF_SYNC: return sysfs_emit(buf, "sync\n"); + case MTE_CTRL_TCF_ASYMM: + return sysfs_emit(buf, "asymm\n"); default: return sysfs_emit(buf, "???\n"); } @@ -499,6 +507,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev, tcf = MTE_CTRL_TCF_ASYNC; else if (sysfs_streq(buf, "sync")) tcf = MTE_CTRL_TCF_SYNC; + else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm")) + tcf = MTE_CTRL_TCF_ASYMM; else return -EINVAL; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5369e649fa79f..941cfa7117b9b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -635,7 +635,10 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) return -EINVAL; if (system_supports_mte()) - valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ + | PR_MTE_TAG_MASK; + if (cpus_have_cap(ARM64_MTE_ASYMM)) + valid_mask |= PR_MTE_TCF_ASYMM; if (arg & ~valid_mask) return -EINVAL; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f02625..4ae2b21e4066a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,7 +238,9 @@ struct prctl_mm_map { # define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) -# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) +# define PR_MTE_TCF_ASYMM (1UL << 19) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \ + PR_MTE_TCF_ASYMM) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) -- GitLab From 2fef99b8372c1ae3d8445ab570e888b5a358dbe9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 11 Feb 2022 18:56:46 -0800 Subject: [PATCH 0662/1586] f2fs: fix missing free nid in f2fs_handle_failed_inode This patch fixes xfstests/generic/475 failure. [ 293.680694] F2FS-fs (dm-1): May loss orphan inode, run fsck to fix. [ 293.685358] Buffer I/O error on dev dm-1, logical block 8388592, async page read [ 293.691527] Buffer I/O error on dev dm-1, logical block 8388592, async page read [ 293.691764] sh (7615): drop_caches: 3 [ 293.691819] sh (7616): drop_caches: 3 [ 293.694017] Buffer I/O error on dev dm-1, logical block 1, async page read [ 293.695659] sh (7618): drop_caches: 3 [ 293.696979] sh (7617): drop_caches: 3 [ 293.700290] sh (7623): drop_caches: 3 [ 293.708621] sh (7626): drop_caches: 3 [ 293.711386] sh (7628): drop_caches: 3 [ 293.711825] sh (7627): drop_caches: 3 [ 293.716738] sh (7630): drop_caches: 3 [ 293.719613] sh (7632): drop_caches: 3 [ 293.720971] sh (7633): drop_caches: 3 [ 293.727741] sh (7634): drop_caches: 3 [ 293.730783] sh (7636): drop_caches: 3 [ 293.732681] sh (7635): drop_caches: 3 [ 293.732988] sh (7637): drop_caches: 3 [ 293.738836] sh (7639): drop_caches: 3 [ 293.740568] sh (7641): drop_caches: 3 [ 293.743053] sh (7640): drop_caches: 3 [ 293.821889] ------------[ cut here ]------------ [ 293.824654] kernel BUG at fs/f2fs/node.c:3334! [ 293.826226] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 293.828713] CPU: 0 PID: 7653 Comm: umount Tainted: G OE 5.17.0-rc1-custom #1 [ 293.830946] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 293.832526] RIP: 0010:f2fs_destroy_node_manager+0x33f/0x350 [f2fs] [ 293.833905] Code: e8 d6 3d f9 f9 48 8b 45 d0 65 48 2b 04 25 28 00 00 00 75 1a 48 81 c4 28 03 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b [ 293.837783] RSP: 0018:ffffb04ec31e7a20 EFLAGS: 00010202 [ 293.839062] RAX: 0000000000000001 RBX: ffff9df947db2eb8 RCX: 0000000080aa0072 [ 293.840666] RDX: 0000000000000000 RSI: ffffe86c0432a140 RDI: ffffffffc0b72a21 [ 293.842261] RBP: ffffb04ec31e7d70 R08: ffff9df94ca85780 R09: 0000000080aa0072 [ 293.843909] R10: ffff9df94ca85700 R11: ffff9df94e1ccf58 R12: ffff9df947db2e00 [ 293.845594] R13: ffff9df947db2ed0 R14: ffff9df947db2eb8 R15: ffff9df947db2eb8 [ 293.847855] FS: 00007f5a97379800(0000) GS:ffff9dfa77c00000(0000) knlGS:0000000000000000 [ 293.850647] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 293.852940] CR2: 00007f5a97528730 CR3: 000000010bc76005 CR4: 0000000000370ef0 [ 293.854680] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 293.856423] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 293.858380] Call Trace: [ 293.859302] [ 293.860311] ? ttwu_do_wakeup+0x1c/0x170 [ 293.861800] ? ttwu_do_activate+0x6d/0xb0 [ 293.863057] ? _raw_spin_unlock_irqrestore+0x29/0x40 [ 293.864411] ? try_to_wake_up+0x9d/0x5e0 [ 293.865618] ? debug_smp_processor_id+0x17/0x20 [ 293.866934] ? debug_smp_processor_id+0x17/0x20 [ 293.868223] ? free_unref_page+0xbf/0x120 [ 293.869470] ? __free_slab+0xcb/0x1c0 [ 293.870614] ? preempt_count_add+0x7a/0xc0 [ 293.871811] ? __slab_free+0xa0/0x2d0 [ 293.872918] ? __wake_up_common_lock+0x8a/0xc0 [ 293.874186] ? __slab_free+0xa0/0x2d0 [ 293.875305] ? free_inode_nonrcu+0x20/0x20 [ 293.876466] ? free_inode_nonrcu+0x20/0x20 [ 293.877650] ? debug_smp_processor_id+0x17/0x20 [ 293.878949] ? call_rcu+0x11a/0x240 [ 293.880060] ? f2fs_destroy_stats+0x59/0x60 [f2fs] [ 293.881437] ? kfree+0x1fe/0x230 [ 293.882674] f2fs_put_super+0x160/0x390 [f2fs] [ 293.883978] generic_shutdown_super+0x7a/0x120 [ 293.885274] kill_block_super+0x27/0x50 [ 293.886496] kill_f2fs_super+0x7f/0x100 [f2fs] [ 293.887806] deactivate_locked_super+0x35/0xa0 [ 293.889271] deactivate_super+0x40/0x50 [ 293.890513] cleanup_mnt+0x139/0x190 [ 293.891689] __cleanup_mnt+0x12/0x20 [ 293.892850] task_work_run+0x64/0xa0 [ 293.894035] exit_to_user_mode_prepare+0x1b7/0x1c0 [ 293.895409] syscall_exit_to_user_mode+0x27/0x50 [ 293.896872] do_syscall_64+0x48/0xc0 [ 293.898090] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 293.899517] RIP: 0033:0x7f5a975cd25b Fixes: 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0ec8e32a00b47..ab8e0c06c78c4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -885,6 +885,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } -- GitLab From c7f91bd4102902384137dd5c50d04bfed27050dd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Feb 2022 10:43:13 -0800 Subject: [PATCH 0663/1586] f2fs: Restore rwsem lockdep support Lockdep uses lock class keys in its analysis. init_rwsem() instantiates one lock class key with each init_rwsem() user as follows: #define init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ __init_rwsem((sem), #sem, &__key); \ } while (0) Commit e4544b63a7ee ("f2fs: move f2fs to use reader-unfair rwsems") reduced the number of lock class keys from one per init_rwsem() user to one per file in which init_f2fs_rwsem() is used. This causes the same lock class key to be associated with multiple f2fs rwsems and also triggers a number of false positive lockdep deadlock reports. Fix this by again instantiating one lock class key with each init_f2fs_rwsem() caller. Cc: Tim Murray Reported-by: syzbot+0b9cadf5fc45a98a5083@syzkaller.appspotmail.com Fixes: e4544b63a7ee ("f2fs: move f2fs to use reader-unfair rwsems") Signed-off-by: Bart Van Assche Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c9515c3c54fd5..47bf9e30913f9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2115,9 +2115,17 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } -static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem) +#define init_f2fs_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_f2fs_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, + const char *sem_name, struct lock_class_key *key) { - init_rwsem(&sem->internal_rwsem); + __init_rwsem(&sem->internal_rwsem, sem_name, key); init_waitqueue_head(&sem->read_waiters); } -- GitLab From 680af5b824a52faa819167628665804a14f0e0df Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Tue, 15 Feb 2022 17:27:21 +0900 Subject: [PATCH 0664/1586] f2fs: quota: fix loop condition at f2fs_quota_sync() cnt should be passed to sb_has_quota_active() instead of type to check active quota properly. Moreover, when the type is -1, the compiler with enough inline knowledge can discard sb_has_quota_active() check altogether, causing a NULL pointer dereference at the following inode_lock(dqopt->files[cnt]): [ 2.796010] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [ 2.796024] Mem abort info: [ 2.796025] ESR = 0x96000005 [ 2.796028] EC = 0x25: DABT (current EL), IL = 32 bits [ 2.796029] SET = 0, FnV = 0 [ 2.796031] EA = 0, S1PTW = 0 [ 2.796032] Data abort info: [ 2.796034] ISV = 0, ISS = 0x00000005 [ 2.796035] CM = 0, WnR = 0 [ 2.796046] user pgtable: 4k pages, 39-bit VAs, pgdp=00000003370d1000 [ 2.796048] [00000000000000a0] pgd=0000000000000000, pud=0000000000000000 [ 2.796051] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 2.796056] CPU: 7 PID: 640 Comm: f2fs_ckpt-259:7 Tainted: G S 5.4.179-arter97-r8-64666-g2f16e087f9d8 #1 [ 2.796057] Hardware name: Qualcomm Technologies, Inc. Lahaina MTP lemonadep (DT) [ 2.796059] pstate: 80c00005 (Nzcv daif +PAN +UAO) [ 2.796065] pc : down_write+0x28/0x70 [ 2.796070] lr : f2fs_quota_sync+0x100/0x294 [ 2.796071] sp : ffffffa3f48ffc30 [ 2.796073] x29: ffffffa3f48ffc30 x28: 0000000000000000 [ 2.796075] x27: ffffffa3f6d718b8 x26: ffffffa415fe9d80 [ 2.796077] x25: ffffffa3f7290048 x24: 0000000000000001 [ 2.796078] x23: 0000000000000000 x22: ffffffa3f7290000 [ 2.796080] x21: ffffffa3f72904a0 x20: ffffffa3f7290110 [ 2.796081] x19: ffffffa3f77a9800 x18: ffffffc020aae038 [ 2.796083] x17: ffffffa40e38e040 x16: ffffffa40e38e6d0 [ 2.796085] x15: ffffffa40e38e6cc x14: ffffffa40e38e6d0 [ 2.796086] x13: 00000000000004f6 x12: 00162c44ff493000 [ 2.796088] x11: 0000000000000400 x10: ffffffa40e38c948 [ 2.796090] x9 : 0000000000000000 x8 : 00000000000000a0 [ 2.796091] x7 : 0000000000000000 x6 : 0000d1060f00002a [ 2.796093] x5 : ffffffa3f48ff718 x4 : 000000000000000d [ 2.796094] x3 : 00000000060c0000 x2 : 0000000000000001 [ 2.796096] x1 : 0000000000000000 x0 : 00000000000000a0 [ 2.796098] Call trace: [ 2.796100] down_write+0x28/0x70 [ 2.796102] f2fs_quota_sync+0x100/0x294 [ 2.796104] block_operations+0x120/0x204 [ 2.796106] f2fs_write_checkpoint+0x11c/0x520 [ 2.796107] __checkpoint_and_complete_reqs+0x7c/0xd34 [ 2.796109] issue_checkpoint_thread+0x6c/0xb8 [ 2.796112] kthread+0x138/0x414 [ 2.796114] ret_from_fork+0x10/0x18 [ 2.796117] Code: aa0803e0 aa1f03e1 52800022 aa0103e9 (c8e97d02) [ 2.796120] ---[ end trace 96e942e8eb6a0b53 ]--- [ 2.800116] Kernel panic - not syncing: Fatal exception [ 2.800120] SMP: stopping secondary CPUs Fixes: 9de71ede81e6 ("f2fs: quota: fix potential deadlock") Cc: # v5.15+ Signed-off-by: Juhyung Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 22fb4d3b11702..8e3840973077a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2689,7 +2689,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2700,8 +2700,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); -- GitLab From 9ddb00a2a136cc6ebbf6ee32fcf527d0d66044a2 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Feb 2022 08:09:27 -0800 Subject: [PATCH 0665/1586] ACPI: fan: Fix error reporting to user space When user get/set cur_state fails, it should be some negative error value instead of whatever returned by acpi_evaluate_object() or from acpi_execute_simple_method(). The return value from these apis is some positive values greater than 0. For example if AE_NOT_FOUND is returned it will be "5". In other ACPI drivers, -ENODEV is returned when ACPI_FAILURE(status) is true. Do the same thing here for thermal sysfs callbacks for get and set for failures. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 5cd0ceb50bc8a..098d64568d6d4 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -107,7 +107,7 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state) status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer); if (ACPI_FAILURE(status)) { dev_err(&device->dev, "Get fan state failed\n"); - return status; + return -ENODEV; } obj = buffer.pointer; @@ -195,7 +195,7 @@ static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state) fan->fps[state].control); if (ACPI_FAILURE(status)) { dev_dbg(&device->dev, "Failed to set state by _FSL\n"); - return status; + return -ENODEV; } return 0; -- GitLab From 00ae053a0533155d830da27070a931e6fa747327 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Feb 2022 08:09:28 -0800 Subject: [PATCH 0666/1586] ACPI: fan: Separate file for attributes creation Move the functionality of creation of sysfs attributes under acpi device to a new file fan_attr.c. This cleans up the core fan code, which just use thermal sysfs interface. The original fan.c is renamed to fan_core.c. No functional changes are expected. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Makefile | 3 + drivers/acpi/fan.h | 35 +++++++++++ drivers/acpi/fan_attr.c | 86 ++++++++++++++++++++++++++ drivers/acpi/{fan.c => fan_core.c} | 98 +++--------------------------- 4 files changed, 133 insertions(+), 89 deletions(-) create mode 100644 drivers/acpi/fan_attr.c rename drivers/acpi/{fan.c => fan_core.c} (80%) diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index bb757148e7ba0..b5a8d3e00a52b 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -81,6 +81,9 @@ obj-$(CONFIG_ACPI_AC) += ac.o obj-$(CONFIG_ACPI_BUTTON) += button.o obj-$(CONFIG_ACPI_TINY_POWER_BUTTON) += tiny-power-button.o obj-$(CONFIG_ACPI_FAN) += fan.o +fan-objs := fan_core.o +fan-objs += fan_attr.o + obj-$(CONFIG_ACPI_VIDEO) += video.o obj-$(CONFIG_ACPI_TAD) += acpi_tad.o obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index dd9bb8ca22442..36c5e1a570944 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -6,9 +6,44 @@ * * Add new device IDs before the generic ACPI fan one. */ + +#ifndef _ACPI_FAN_H_ +#define _ACPI_FAN_H_ + #define ACPI_FAN_DEVICE_IDS \ {"INT3404", }, /* Fan */ \ {"INTC1044", }, /* Fan for Tiger Lake generation */ \ {"INTC1048", }, /* Fan for Alder Lake generation */ \ {"INTC10A2", }, /* Fan for Raptor Lake generation */ \ {"PNP0C0B", } /* Generic ACPI fan */ + +#define ACPI_FPS_NAME_LEN 20 + +struct acpi_fan_fps { + u64 control; + u64 trip_point; + u64 speed; + u64 noise_level; + u64 power; + char name[ACPI_FPS_NAME_LEN]; + struct device_attribute dev_attr; +}; + +struct acpi_fan_fif { + u64 revision; + u64 fine_grain_ctrl; + u64 step_size; + u64 low_speed_notification; +}; + +struct acpi_fan { + bool acpi4; + struct acpi_fan_fif fif; + struct acpi_fan_fps *fps; + int fps_count; + struct thermal_cooling_device *cdev; +}; + +int acpi_fan_create_attributes(struct acpi_device *device); +void acpi_fan_delete_attributes(struct acpi_device *device); +#endif diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c new file mode 100644 index 0000000000000..7b109022108b4 --- /dev/null +++ b/drivers/acpi/fan_attr.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * fan_attr.c - Create extra attributes for ACPI Fan driver + * + * Copyright (C) 2001, 2002 Andy Grover + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2022 Intel Corporation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "fan.h" + +MODULE_LICENSE("GPL"); + +static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr); + int count; + + if (fps->control == 0xFFFFFFFF || fps->control > 100) + count = scnprintf(buf, PAGE_SIZE, "not-defined:"); + else + count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control); + + if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9) + count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:"); + else + count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point); + + if (fps->speed == 0xFFFFFFFF) + count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:"); + else + count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed); + + if (fps->noise_level == 0xFFFFFFFF) + count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:"); + else + count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100); + + if (fps->power == 0xFFFFFFFF) + count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n"); + else + count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power); + + return count; +} + +int acpi_fan_create_attributes(struct acpi_device *device) +{ + struct acpi_fan *fan = acpi_driver_data(device); + int i, status = 0; + + for (i = 0; i < fan->fps_count; ++i) { + struct acpi_fan_fps *fps = &fan->fps[i]; + + snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i); + sysfs_attr_init(&fps->dev_attr.attr); + fps->dev_attr.show = show_state; + fps->dev_attr.store = NULL; + fps->dev_attr.attr.name = fps->name; + fps->dev_attr.attr.mode = 0444; + status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr); + if (status) { + int j; + + for (j = 0; j < i; ++j) + sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr); + break; + } + } + + return status; +} + +void acpi_fan_delete_attributes(struct acpi_device *device) +{ + struct acpi_fan *fan = acpi_driver_data(device); + int i; + + for (i = 0; i < fan->fps_count; ++i) + sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr); +} diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan_core.c similarity index 80% rename from drivers/acpi/fan.c rename to drivers/acpi/fan_core.c index 098d64568d6d4..9f8e68403fad6 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan_core.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * acpi_fan.c - ACPI Fan Driver ($Revision: 29 $) + * fan_core.c - ACPI Fan core Driver * * Copyright (C) 2001, 2002 Andy Grover * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2022 Intel Corporation. All rights reserved. */ #include @@ -45,33 +46,6 @@ static const struct dev_pm_ops acpi_fan_pm = { #define FAN_PM_OPS_PTR NULL #endif -#define ACPI_FPS_NAME_LEN 20 - -struct acpi_fan_fps { - u64 control; - u64 trip_point; - u64 speed; - u64 noise_level; - u64 power; - char name[ACPI_FPS_NAME_LEN]; - struct device_attribute dev_attr; -}; - -struct acpi_fan_fif { - u64 revision; - u64 fine_grain_ctrl; - u64 step_size; - u64 low_speed_notification; -}; - -struct acpi_fan { - bool acpi4; - struct acpi_fan_fif fif; - struct acpi_fan_fps *fps; - int fps_count; - struct thermal_cooling_device *cdev; -}; - static struct platform_driver acpi_fan_driver = { .probe = acpi_fan_probe, .remove = acpi_fan_remove, @@ -270,39 +244,6 @@ static int acpi_fan_speed_cmp(const void *a, const void *b) return fps1->speed - fps2->speed; } -static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr); - int count; - - if (fps->control == 0xFFFFFFFF || fps->control > 100) - count = scnprintf(buf, PAGE_SIZE, "not-defined:"); - else - count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control); - - if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9) - count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:"); - else - count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point); - - if (fps->speed == 0xFFFFFFFF) - count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:"); - else - count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed); - - if (fps->noise_level == 0xFFFFFFFF) - count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:"); - else - count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100); - - if (fps->power == 0xFFFFFFFF) - count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n"); - else - count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power); - - return count; -} - static int acpi_fan_get_fps(struct acpi_device *device) { struct acpi_fan *fan = acpi_driver_data(device); @@ -347,25 +288,6 @@ static int acpi_fan_get_fps(struct acpi_device *device) sort(fan->fps, fan->fps_count, sizeof(*fan->fps), acpi_fan_speed_cmp, NULL); - for (i = 0; i < fan->fps_count; ++i) { - struct acpi_fan_fps *fps = &fan->fps[i]; - - snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i); - sysfs_attr_init(&fps->dev_attr.attr); - fps->dev_attr.show = show_state; - fps->dev_attr.store = NULL; - fps->dev_attr.attr.name = fps->name; - fps->dev_attr.attr.mode = 0444; - status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr); - if (status) { - int j; - - for (j = 0; j < i; ++j) - sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr); - break; - } - } - err: kfree(obj); return status; @@ -396,6 +318,10 @@ static int acpi_fan_probe(struct platform_device *pdev) if (result) return result; + result = acpi_fan_create_attributes(device); + if (result) + return result; + fan->acpi4 = true; } else { result = acpi_device_update_power(device, NULL); @@ -437,12 +363,8 @@ static int acpi_fan_probe(struct platform_device *pdev) return 0; err_end: - if (fan->acpi4) { - int i; - - for (i = 0; i < fan->fps_count; ++i) - sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr); - } + if (fan->acpi4) + acpi_fan_delete_attributes(device); return result; } @@ -453,10 +375,8 @@ static int acpi_fan_remove(struct platform_device *pdev) if (fan->acpi4) { struct acpi_device *device = ACPI_COMPANION(&pdev->dev); - int i; - for (i = 0; i < fan->fps_count; ++i) - sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr); + acpi_fan_delete_attributes(device); } sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling"); sysfs_remove_link(&fan->cdev->device.kobj, "device"); -- GitLab From d445571fa369cf08148dcd9bce563d5fae14fcd7 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Feb 2022 08:09:29 -0800 Subject: [PATCH 0667/1586] ACPI: fan: Optimize struct acpi_fan_fif We don't need u64 to store the information about _FIF. There are two booleans (fine_grain_ctrl and low_speed_notification) and one field step_size which can take value from 1-9. There are no internal users of revision field. So convert all fields to u8, by not directly extracting the _FIF info the struct. Use an intermediate buffer to extract and assign. This will help to do u32 math using these fields. No functional changes are expected. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.h | 8 ++++---- drivers/acpi/fan_core.c | 8 +++++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index 36c5e1a570944..6cbb4b028da03 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -30,10 +30,10 @@ struct acpi_fan_fps { }; struct acpi_fan_fif { - u64 revision; - u64 fine_grain_ctrl; - u64 step_size; - u64 low_speed_notification; + u8 revision; + u8 fine_grain_ctrl; + u8 step_size; + u8 low_speed_notification; }; struct acpi_fan { diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index 9f8e68403fad6..484cee0fb13e2 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -211,7 +211,8 @@ static int acpi_fan_get_fif(struct acpi_device *device) struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_fan *fan = acpi_driver_data(device); struct acpi_buffer format = { sizeof("NNNN"), "NNNN" }; - struct acpi_buffer fif = { sizeof(fan->fif), &fan->fif }; + u64 fields[4]; + struct acpi_buffer fif = { sizeof(fields), fields }; union acpi_object *obj; acpi_status status; @@ -232,6 +233,11 @@ static int acpi_fan_get_fif(struct acpi_device *device) status = -EINVAL; } + fan->fif.revision = fields[0]; + fan->fif.fine_grain_ctrl = fields[1]; + fan->fif.step_size = fields[2]; + fan->fif.low_speed_notification = fields[3]; + err: kfree(obj); return status; -- GitLab From bea2d9868ef553e376480de3cd84a7a06fb03e41 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Feb 2022 15:27:42 -0800 Subject: [PATCH 0668/1586] ACPI: fan: Properly handle fine grain control When _FIF object specifies support for fine grain control, then fan speed can be set from 0 to 100% with the recommended minimum "step size" via _FSL object. Here the control value doesn't need to match any value from _FPS object. Currently we have a simple solution implemented which just pick maximum control value from _FPS to display the actual state, but this is not optimal when there is a big window between two control values in _FPS. Also there is no way to set to any speed which doesn't match control values in _FPS. The system firmware can start the fan at speed which doesn't match any control value. To support fine grain control (when supported) via thermal sysfs: - cooling device max state is not _FPS state count but it will be 100 / _FIF.step_size Step size can be from 1 to 9. - cooling device current state is _FST.control / _FIF.step_size - cooling device set state will set the control value cdev.curr_state * _FIF.step_size plus any adjustment for 100%. By the spec, when control value do not sum to 100% because of _FIF.step_size, OSPM may select an appropriate ending Level increment to reach 100%. There is no rounding during calculation. For example if step size is 6: thermal sysfs cooling device max_state = 100/6 = 16 So user can set any value from 0-16. If the system boots with a _FST.control which is not multiples of step_size, the thermal sysfs cur_state will be based on the range. For example for step size = 6: _FST.control thermal sysfs cur_state ------------------------------------------------ 0-5 0 6-11 1 .. .. 90-95 15 96-100 16 While setting the _FST.control, the compensation will be at the last step for cur_state = 16, which will set the _FST.control to 100. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.h | 6 +++ drivers/acpi/fan_core.c | 94 +++++++++++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index 6cbb4b028da03..4c01be2e3b779 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -36,6 +36,12 @@ struct acpi_fan_fif { u8 low_speed_notification; }; +struct acpi_fan_fst { + u64 revision; + u64 control; + u64 speed; +}; + struct acpi_fan { bool acpi4; struct acpi_fan_fif fif; diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index 484cee0fb13e2..01616ec2e9ac0 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -63,20 +63,24 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long struct acpi_device *device = cdev->devdata; struct acpi_fan *fan = acpi_driver_data(device); - if (fan->acpi4) - *state = fan->fps_count - 1; - else + if (fan->acpi4) { + if (fan->fif.fine_grain_ctrl) + *state = 100 / fan->fif.step_size; + else + *state = fan->fps_count - 1; + } else { *state = 1; + } + return 0; } -static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state) +static int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_fan *fan = acpi_driver_data(device); union acpi_object *obj; acpi_status status; - int control, i; + int ret = 0; status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer); if (ACPI_FAILURE(status)) { @@ -89,35 +93,52 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state) obj->package.count != 3 || obj->package.elements[1].type != ACPI_TYPE_INTEGER) { dev_err(&device->dev, "Invalid _FST data\n"); - status = -EINVAL; + ret = -EINVAL; goto err; } - control = obj->package.elements[1].integer.value; + fst->revision = obj->package.elements[0].integer.value; + fst->control = obj->package.elements[1].integer.value; + fst->speed = obj->package.elements[2].integer.value; + +err: + kfree(obj); + return ret; +} + +static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state) +{ + struct acpi_fan *fan = acpi_driver_data(device); + struct acpi_fan_fst fst; + int status, i; + + status = acpi_fan_get_fst(device, &fst); + if (status) + return status; + + if (fan->fif.fine_grain_ctrl) { + /* This control should be same what we set using _FSL by spec */ + if (fst.control > 100) { + dev_dbg(&device->dev, "Invalid control value returned\n"); + goto match_fps; + } + + *state = (int) fst.control / fan->fif.step_size; + return 0; + } + +match_fps: for (i = 0; i < fan->fps_count; i++) { - /* - * When Fine Grain Control is set, return the state - * corresponding to maximum fan->fps[i].control - * value compared to the current speed. Here the - * fan->fps[] is sorted array with increasing speed. - */ - if (fan->fif.fine_grain_ctrl && control < fan->fps[i].control) { - i = (i > 0) ? i - 1 : 0; - break; - } else if (control == fan->fps[i].control) { + if (fst.control == fan->fps[i].control) break; - } } if (i == fan->fps_count) { dev_dbg(&device->dev, "Invalid control value returned\n"); - status = -EINVAL; - goto err; + return -EINVAL; } *state = i; -err: - kfree(obj); return status; } @@ -161,12 +182,27 @@ static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state) { struct acpi_fan *fan = acpi_driver_data(device); acpi_status status; + u64 value = state; + int max_state; - if (state >= fan->fps_count) + if (fan->fif.fine_grain_ctrl) + max_state = 100 / fan->fif.step_size; + else + max_state = fan->fps_count - 1; + + if (state > max_state) return -EINVAL; - status = acpi_execute_simple_method(device->handle, "_FSL", - fan->fps[state].control); + if (fan->fif.fine_grain_ctrl) { + value *= fan->fif.step_size; + /* Spec allows compensate the last step only */ + if (value + fan->fif.step_size > 100) + value = 100; + } else { + value = fan->fps[state].control; + } + + status = acpi_execute_simple_method(device->handle, "_FSL", value); if (ACPI_FAILURE(status)) { dev_dbg(&device->dev, "Failed to set state by _FSL\n"); return -ENODEV; @@ -238,6 +274,12 @@ static int acpi_fan_get_fif(struct acpi_device *device) fan->fif.step_size = fields[2]; fan->fif.low_speed_notification = fields[3]; + /* If there is a bug in step size and set as 0, change to 1 */ + if (!fan->fif.step_size) + fan->fif.step_size = 1; + /* If step size > 9, change to 9 (by spec valid values 1-9) */ + else if (fan->fif.step_size > 9) + fan->fif.step_size = 9; err: kfree(obj); return status; -- GitLab From f1197343f07749035d74c08cf8b546c4f95614ab Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Feb 2022 08:09:31 -0800 Subject: [PATCH 0669/1586] ACPI: fan: Add additional attributes for fine grain control Add additional attributes, which helps in implementing algorithm in the user space to optimize fan control. These attributes are presented in the same directory as the existing performance state attributes. Additional attributes: 1. Support of fine grain control Publish support of presence of fine grain control so that fan speed can be tuned correctly. This attribute is called "fine_grain_control". 2. fan speed Publish the actual fan rpm in sysfs. Knowing fan rpm is helpful to reduce noise level and use passive control instead. Also fan performance may not be same over time, so the same control value may not be enough to run the fan at a speed. So a feedback value of speed is helpful. This sysfs attribute is called "fan_speed_rpm". Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.h | 3 +++ drivers/acpi/fan_attr.c | 55 +++++++++++++++++++++++++++++++++++++++-- drivers/acpi/fan_core.c | 2 +- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index 4c01be2e3b779..44728529a5b6b 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -48,8 +48,11 @@ struct acpi_fan { struct acpi_fan_fps *fps; int fps_count; struct thermal_cooling_device *cdev; + struct device_attribute fst_speed; + struct device_attribute fine_grain_control; }; +int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst); int acpi_fan_create_attributes(struct acpi_device *device); void acpi_fan_delete_attributes(struct acpi_device *device); #endif diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c index 7b109022108b4..f15157d40713e 100644 --- a/drivers/acpi/fan_attr.c +++ b/drivers/acpi/fan_attr.c @@ -49,10 +49,50 @@ static ssize_t show_state(struct device *dev, struct device_attribute *attr, cha return count; } +static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev); + struct acpi_fan_fst fst; + int status; + + status = acpi_fan_get_fst(acpi_dev, &fst); + if (status) + return status; + + return sprintf(buf, "%lld\n", fst.speed); +} + +static ssize_t show_fine_grain_control(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev); + struct acpi_fan *fan = acpi_driver_data(acpi_dev); + + return sprintf(buf, "%d\n", fan->fif.fine_grain_ctrl); +} + int acpi_fan_create_attributes(struct acpi_device *device) { struct acpi_fan *fan = acpi_driver_data(device); - int i, status = 0; + int i, status; + + sysfs_attr_init(&fan->fine_grain_control.attr); + fan->fine_grain_control.show = show_fine_grain_control; + fan->fine_grain_control.store = NULL; + fan->fine_grain_control.attr.name = "fine_grain_control"; + fan->fine_grain_control.attr.mode = 0444; + status = sysfs_create_file(&device->dev.kobj, &fan->fine_grain_control.attr); + if (status) + return status; + + /* _FST is present if we are here */ + sysfs_attr_init(&fan->fst_speed.attr); + fan->fst_speed.show = show_fan_speed; + fan->fst_speed.store = NULL; + fan->fst_speed.attr.name = "fan_speed_rpm"; + fan->fst_speed.attr.mode = 0444; + status = sysfs_create_file(&device->dev.kobj, &fan->fst_speed.attr); + if (status) + goto rem_fine_grain_attr; for (i = 0; i < fan->fps_count; ++i) { struct acpi_fan_fps *fps = &fan->fps[i]; @@ -69,10 +109,18 @@ int acpi_fan_create_attributes(struct acpi_device *device) for (j = 0; j < i; ++j) sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr); - break; + goto rem_fst_attr; } } + return 0; + +rem_fst_attr: + sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr); + +rem_fine_grain_attr: + sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr); + return status; } @@ -83,4 +131,7 @@ void acpi_fan_delete_attributes(struct acpi_device *device) for (i = 0; i < fan->fps_count; ++i) sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr); + + sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr); + sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr); } diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index 01616ec2e9ac0..b9a9a59ddcc10 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -75,7 +75,7 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long return 0; } -static int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst) +int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; -- GitLab From 0750b8fcf313845b21c71344b4bea8ad7d3cee84 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Feb 2022 08:09:32 -0800 Subject: [PATCH 0670/1586] Documentation/admin-guide/acpi: Add documentation for fine grain control Add documentation for the newly added attributes: fine_grain_control fan_speed_rpm Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- .../acpi/fan_performance_states.rst | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst index 98fe5c3331214..b9e4b4d146c1e 100644 --- a/Documentation/admin-guide/acpi/fan_performance_states.rst +++ b/Documentation/admin-guide/acpi/fan_performance_states.rst @@ -60,3 +60,31 @@ For example:: When a given field is not populated or its value provided by the platform firmware is invalid, the "not-defined" string is shown instead of the value. + +ACPI Fan Fine Grain Control +============================= + +When _FIF object specifies support for fine grain control, then fan speed +can be set from 0 to 100% with the recommended minimum "step size" via +_FSL object. User can adjust fan speed using thermal sysfs cooling device. + +Here use can look at fan performance states for a reference speed (speed_rpm) +and set it by changing cooling device cur_state. If the fine grain control +is supported then user can also adjust to some other speeds which are +not defined in the performance states. + +The support of fine grain control is presented via sysfs attribute +"fine_grain_control". If fine grain control is present, this attribute +will show "1" otherwise "0". + +This sysfs attribute is presented in the same directory as performance states. + +ACPI Fan Performance Feedback +============================= + +The optional _FST object provides status information for the fan device. +This includes field to provide current fan speed in revolutions per minute +at which the fan is rotating. + +This speed is presented in the sysfs using the attribute "fan_speed_rpm", +in the same directory as performance states. -- GitLab From 7dacee0b9efc8bd061f097b1a8d4daa6591af0c6 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 13 Feb 2022 16:49:20 +0100 Subject: [PATCH 0671/1586] ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3 For some reason, the Microsoft Surface Go 3 uses the standard ACPI interface for battery information, but does not use the standard PNP0C0A HID. Instead it uses MSHW0146 as identifier. Add that ID to the driver as this seems to work well. Additionally, the power state is not updated immediately after the AC has been (un-)plugged, so add the respective quirk for that. Signed-off-by: Maximilian Luz Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ea31ae01458b4..dc208f5f5a1f7 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -59,6 +59,10 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); static const struct acpi_device_id battery_device_ids[] = { {"PNP0C0A", 0}, + + /* Microsoft Surface Go 3 */ + {"MSHW0146", 0}, + {"", 0}, }; @@ -1148,6 +1152,14 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"), }, }, + { + /* Microsoft Surface Go 3 */ + .callback = battery_notification_delay_quirk, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), + }, + }, {}, }; -- GitLab From 65881e1db4e948614d9eb195b8e1197339822949 Mon Sep 17 00:00:00 2001 From: Richard Haines Date: Fri, 25 Feb 2022 17:54:38 +0000 Subject: [PATCH 0672/1586] selinux: allow FIOCLEX and FIONCLEX with policy capability These ioctls are equivalent to fcntl(fd, F_SETFD, flags), which SELinux always allows too. Furthermore, a failed FIOCLEX could result in a file descriptor being leaked to a process that should not have access to it. As this patch removes access controls, a policy capability needs to be enabled in policy to always allow these ioctls. Based-on-patch-by: Demi Marie Obenour Signed-off-by: Richard Haines [PM: subject line tweak] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 6 ++++++ security/selinux/include/policycap.h | 1 + security/selinux/include/policycap_names.h | 3 ++- security/selinux/include/security.h | 7 +++++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1e69f88eb326a..b12e14b2797b9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3682,6 +3682,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, CAP_OPT_NONE, true); break; + case FIOCLEX: + case FIONCLEX: + if (!selinux_policycap_ioctl_skip_cloexec()) + error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd); + break; + /* default case assumes that the command will go * to the file's ioctl() function. */ diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h index 2ec038efbb03c..a9e572ca4fd96 100644 --- a/security/selinux/include/policycap.h +++ b/security/selinux/include/policycap.h @@ -11,6 +11,7 @@ enum { POLICYDB_CAPABILITY_CGROUPSECLABEL, POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION, POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS, + POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC, __POLICYDB_CAPABILITY_MAX }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h index b89289f092c93..ebd64afe1defd 100644 --- a/security/selinux/include/policycap_names.h +++ b/security/selinux/include/policycap_names.h @@ -12,7 +12,8 @@ const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "always_check_network", "cgroup_seclabel", "nnp_nosuid_transition", - "genfs_seclabel_symlinks" + "genfs_seclabel_symlinks", + "ioctl_skip_cloexec" }; #endif /* _SELINUX_POLICYCAP_NAMES_H_ */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index d91a5672de991..84f42fa8012f1 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -219,6 +219,13 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void) return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } +static inline bool selinux_policycap_ioctl_skip_cloexec(void) +{ + struct selinux_state *state = &selinux_state; + + return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]); +} + struct selinux_policy_convert_data; struct selinux_load_state { -- GitLab From 2792d84e6da5e0fd7d3b22fd70bc69b7ee263609 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Feb 2022 12:05:28 -0800 Subject: [PATCH 0673/1586] usercopy: Check valid lifetime via stack depth One of the things that CONFIG_HARDENED_USERCOPY sanity-checks is whether an object that is about to be copied to/from userspace is overlapping the stack at all. If it is, it performs a number of inexpensive bounds checks. One of the finer-grained checks is whether an object crosses stack frames within the stack region. Doing this on x86 with CONFIG_FRAME_POINTER was cheap/easy. Doing it with ORC was deemed too heavy, and was left out (a while ago), leaving the courser whole-stack check. The LKDTM tests USERCOPY_STACK_FRAME_TO and USERCOPY_STACK_FRAME_FROM try to exercise these cross-frame cases to validate the defense is working. They have been failing ever since ORC was added (which was expected). While Muhammad was investigating various LKDTM failures[1], he asked me for additional details on them, and I realized that when exact stack frame boundary checking is not available (i.e. everything except x86 with FRAME_POINTER), it could check if a stack object is at least "current depth valid", in the sense that any object within the stack region but not between start-of-stack and current_stack_pointer should be considered unavailable (i.e. its lifetime is from a call no longer present on the stack). Introduce ARCH_HAS_CURRENT_STACK_POINTER to track which architectures have actually implemented the common global register alias. Additionally report usercopy bounds checking failures with an offset from current_stack_pointer, which may assist with diagnosing failures. The LKDTM USERCOPY_STACK_FRAME_TO and USERCOPY_STACK_FRAME_FROM tests (once slightly adjusted in a separate patch) pass again with this fixed. [1] https://github.com/kernelci/kernelci-project/issues/84 Cc: Matthew Wilcox (Oracle) Cc: Josh Poimboeuf Cc: Andrew Morton Cc: linux-mm@kvack.org Reported-by: Muhammad Usama Anjum Signed-off-by: Kees Cook --- v1: https://lore.kernel.org/lkml/20220216201449.2087956-1-keescook@chromium.org v2: https://lore.kernel.org/lkml/20220224060342.1855457-1-keescook@chromium.org v3: https://lore.kernel.org/lkml/20220225173345.3358109-1-keescook@chromium.org v4: - improve commit log (akpm) --- arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/x86/Kconfig | 1 + mm/Kconfig | 9 +++++++++ mm/usercopy.c | 23 +++++++++++++++++++++-- 8 files changed, 36 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4c97cb40eebb6..a7a09eef18521 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_32BIT_OFF_T select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE select ARCH_HAS_ELF_RANDOMIZE diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f2b5a4abef215..b8ab790555c81 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -18,6 +18,7 @@ config ARM64 select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b779603978e10..7e7387bd7d538 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -108,6 +108,7 @@ config PPC select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index be9f39fd06df6..4845ab549dd1d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2474a04ceac43..1c2b53bf30930 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -7,6 +7,7 @@ config SUPERH select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PTE_SPECIAL diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660c..90494fba3620f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,6 +69,7 @@ config X86 select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/mm/Kconfig b/mm/Kconfig index 3326ee3903f33..c349599601f82 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -744,6 +744,15 @@ config IDLE_PAGE_TRACKING config ARCH_HAS_CACHE_LINE_SIZE bool +config ARCH_HAS_CURRENT_STACK_POINTER + bool + help + In support of HARDENED_USERCOPY performing stack variable lifetime + checking, an architecture-agnostic way to find the stack pointer + is needed. Once an architecture defines an unsigned long global + register alias named "current_stack_pointer", this config can be + selected. + config ARCH_HAS_PTE_DEVMAP bool diff --git a/mm/usercopy.c b/mm/usercopy.c index d0d268135d96d..5d34c40c16c22 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -29,7 +29,7 @@ * Returns: * NOT_STACK: not at all on the stack * GOOD_FRAME: fully within a valid stack frame - * GOOD_STACK: fully on the stack (when can't do frame-checking) + * GOOD_STACK: within the current stack (when can't frame-check exactly) * BAD_STACK: error condition (invalid stack position or bad stack frame) */ static noinline int check_stack_object(const void *obj, unsigned long len) @@ -55,6 +55,17 @@ static noinline int check_stack_object(const void *obj, unsigned long len) if (ret) return ret; + /* Finally, check stack depth if possible. */ +#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER + if (IS_ENABLED(CONFIG_STACK_GROWSUP)) { + if ((void *)current_stack_pointer < obj + len) + return BAD_STACK; + } else { + if (obj < (void *)current_stack_pointer) + return BAD_STACK; + } +#endif + return GOOD_STACK; } @@ -280,7 +291,15 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) */ return; default: - usercopy_abort("process stack", NULL, to_user, 0, n); + usercopy_abort("process stack", NULL, to_user, +#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER + IS_ENABLED(CONFIG_STACK_GROWSUP) ? + ptr - (void *)current_stack_pointer : + (void *)current_stack_pointer - ptr, +#else + 0, +#endif + n); } /* Check for bad heap object. */ -- GitLab From 92652cf986441b18282db6e5bd82afc74e8ed5e9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Feb 2022 17:26:00 -0800 Subject: [PATCH 0674/1586] xtensa: Implement "current_stack_pointer" To follow the existing per-arch conventions replace open-coded uses of asm "sp" as "current_stack_pointer". This will let it be used in non-arch places (like HARDENED_USERCOPY). Cc: Chris Zankel Cc: Marc Zyngier Cc: linux-xtensa@linux-xtensa.org Signed-off-by: Kees Cook Acked-by: Max Filippov Link: https://lore.kernel.org/lkml/CAMo8BfJFJE-n3=AF+pb9_6oF3gzxX7a+7aBrASHjjNX5byqDqw@mail.gmail.com --- arch/xtensa/Kconfig | 1 + arch/xtensa/include/asm/current.h | 2 ++ arch/xtensa/include/asm/stacktrace.h | 8 ++++---- arch/xtensa/kernel/irq.c | 3 +-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 8ac599aa6d994..8874323276131 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -3,6 +3,7 @@ config XTENSA def_bool y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if MMU select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 5d98a7ad4251b..08010dbf5e09a 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -26,6 +26,8 @@ static inline struct task_struct *get_current(void) #define current get_current() +register unsigned long current_stack_pointer __asm__("a1"); + #else #define GET_CURRENT(reg,sp) \ diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h index fe06e8ed162b7..a85e785a62888 100644 --- a/arch/xtensa/include/asm/stacktrace.h +++ b/arch/xtensa/include/asm/stacktrace.h @@ -19,14 +19,14 @@ struct stackframe { static __always_inline unsigned long *stack_pointer(struct task_struct *task) { - unsigned long *sp; + unsigned long sp; if (!task || task == current) - __asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp)); + sp = current_stack_pointer; else - sp = (unsigned long *)task->thread.sp; + sp = task->thread.sp; - return sp; + return (unsigned long *)sp; } void walk_stackframe(unsigned long *sp, diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 15051a8a15399..529fe92458219 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -36,9 +36,8 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { - unsigned long sp; + unsigned long sp = current_stack_pointer; - __asm__ __volatile__ ("mov %0, a1\n" : "=a" (sp)); sp &= THREAD_SIZE - 1; if (unlikely(sp < (sizeof(thread_info) + 1024))) -- GitLab From e3952fcce1aad934f1322843b564ff86256444b2 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 1 Feb 2022 14:13:45 +0100 Subject: [PATCH 0675/1586] ext4: fix remount with 'abort' option After commit 6e47a3cc68fc ("ext4: get rid of super block and sbi from handle_mount_ops()") the 'abort' options stopped working. This is because we're using ctx_set_mount_flags() helper that's expecting an argument with the appropriate bit set, but instead got EXT4_MF_FS_ABORTED which is a bit position. ext4_set_mount_flag() is using set_bit() while ctx_set_mount_flags() was using bitwise OR. Create a separate helper ctx_set_mount_flag() to handle setting the mount_flags correctly. While we're at it clean up the EXT4_SET_CTX macros so that we're only creating helpers that we actually use to avoid warnings. Fixes: 6e47a3cc68fc ("ext4: get rid of super block and sbi from handle_mount_ops()") Signed-off-by: Lukas Czerner Cc: Ye Bin Reviewed-by: Eric Sandeen Tested-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20220201131345.77591-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c5021ca0a28ad..1fe91a26b88c7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2045,8 +2045,8 @@ struct ext4_fs_context { unsigned int mask_s_mount_opt; unsigned int vals_s_mount_opt2; unsigned int mask_s_mount_opt2; - unsigned int vals_s_mount_flags; - unsigned int mask_s_mount_flags; + unsigned long vals_s_mount_flags; + unsigned long mask_s_mount_flags; unsigned int opt_flags; /* MOPT flags */ unsigned int spec; u32 s_max_batch_time; @@ -2149,23 +2149,36 @@ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ -} \ +} + +#define EXT4_CLEAR_CTX(name) \ static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ -} \ +} + +#define EXT4_TEST_CTX(name) \ static inline unsigned long \ ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ return (ctx->vals_s_##name & flag); \ -} \ +} -EXT4_SET_CTX(flags); +EXT4_SET_CTX(flags); /* set only */ EXT4_SET_CTX(mount_opt); +EXT4_CLEAR_CTX(mount_opt); +EXT4_TEST_CTX(mount_opt); EXT4_SET_CTX(mount_opt2); -EXT4_SET_CTX(mount_flags); +EXT4_CLEAR_CTX(mount_opt2); +EXT4_TEST_CTX(mount_opt2); + +static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) +{ + set_bit(bit, &ctx->mask_s_mount_flags); + set_bit(bit, &ctx->vals_s_mount_flags); +} static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) { @@ -2235,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); return 0; case Opt_abort: - ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); return 0; case Opt_i_version: ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); -- GitLab From cc16eecae687912238ee6efbff71ad31e2bc414e Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Thu, 10 Feb 2022 21:07:11 +0530 Subject: [PATCH 0676/1586] jbd2: fix use-after-free of transaction_t race jbd2_journal_wait_updates() is called with j_state_lock held. But if there is a commit in progress, then this transaction might get committed and freed via jbd2_journal_commit_transaction() -> jbd2_journal_free_transaction(), when we release j_state_lock. So check for journal->j_running_transaction everytime we release and acquire j_state_lock to avoid use-after-free issue. Link: https://lore.kernel.org/r/948c2fed518ae739db6a8f7f83f1d58b504f87d0.1644497105.git.ritesh.list@gmail.com Fixes: 4f98186848707f53 ("jbd2: refactor wait logic for transaction updates into a common function") Cc: stable@kernel.org Reported-and-tested-by: syzbot+afa2ca5171d93e44b348@syzkaller.appspotmail.com Reviewed-by: Jan Kara Signed-off-by: Ritesh Harjani Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8e2f8275a2535..259e00046a8bd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -842,27 +842,38 @@ EXPORT_SYMBOL(jbd2_journal_restart); */ void jbd2_journal_wait_updates(journal_t *journal) { - transaction_t *commit_transaction = journal->j_running_transaction; + DEFINE_WAIT(wait); - if (!commit_transaction) - return; + while (1) { + /* + * Note that the running transaction can get freed under us if + * this transaction is getting committed in + * jbd2_journal_commit_transaction() -> + * jbd2_journal_free_transaction(). This can only happen when we + * release j_state_lock -> schedule() -> acquire j_state_lock. + * Hence we should everytime retrieve new j_running_transaction + * value (after j_state_lock release acquire cycle), else it may + * lead to use-after-free of old freed transaction. + */ + transaction_t *transaction = journal->j_running_transaction; - spin_lock(&commit_transaction->t_handle_lock); - while (atomic_read(&commit_transaction->t_updates)) { - DEFINE_WAIT(wait); + if (!transaction) + break; + spin_lock(&transaction->t_handle_lock); prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&commit_transaction->t_updates)) { - spin_unlock(&commit_transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - write_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); + TASK_UNINTERRUPTIBLE); + if (!atomic_read(&transaction->t_updates)) { + spin_unlock(&transaction->t_handle_lock); + finish_wait(&journal->j_wait_updates, &wait); + break; } + spin_unlock(&transaction->t_handle_lock); + write_unlock(&journal->j_state_lock); + schedule(); finish_wait(&journal->j_wait_updates, &wait); + write_lock(&journal->j_state_lock); } - spin_unlock(&commit_transaction->t_handle_lock); } /** @@ -877,8 +888,6 @@ void jbd2_journal_wait_updates(journal_t *journal) */ void jbd2_journal_lock_updates(journal_t *journal) { - DEFINE_WAIT(wait); - jbd2_might_wait_for_commit(journal); write_lock(&journal->j_state_lock); -- GitLab From f7f497cb702462e8505ff3d8d4e7722ad95626a1 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:30:35 +0530 Subject: [PATCH 0677/1586] jbd2: kill t_handle_lock transaction spinlock This patch kills t_handle_lock transaction spinlock completely from jbd2. To explain the reasoning, currently there were three sites at which this spinlock was used. 1. jbd2_journal_wait_updates() a. Based on careful code review it can be seen that, we don't need this lock here. This is since we wait for any currently ongoing updates based on a atomic variable t_updates. And we anyway don't take any t_handle_lock while in stop_this_handle(). i.e. write_lock(&journal->j_state_lock() jbd2_journal_wait_updates() stop_this_handle() while (atomic_read(txn->t_updates) { | DEFINE_WAIT(wait); | prepare_to_wait(); | if (atomic_read(txn->t_updates) if (atomic_dec_and_test(txn->t_updates)) write_unlock(&journal->j_state_lock); schedule(); wake_up() write_lock(&journal->j_state_lock); finish_wait(); } txn->t_state = T_COMMIT write_unlock(&journal->j_state_lock); b. Also note that between atomic_inc(&txn->t_updates) in start_this_handle() and jbd2_journal_wait_updates(), the synchronization happens via read_lock(journal->j_state_lock) in start_this_handle(); 2. jbd2_journal_extend() a. jbd2_journal_extend() is called with the handle of each process from task_struct. So no lock required in updating member fields of handle_t b. For member fields of h_transaction, all updates happens only via atomic APIs (which is also within read_lock()). So, no need of this transaction spinlock. 3. update_t_max_wait() Based on Jan suggestion, this can be carefully removed using atomic cmpxchg API. Note that there can be several processes which are waiting for a new transaction to be allocated and started. For doing this only one process will succeed in taking write_lock() and allocating a new txn. After that all of the process will be updating the t_max_wait (max transaction wait time). This can be done via below method w/o taking any locks using atomic cmpxchg. For more details refer [1] new = get_new_val(); old = READ_ONCE(ptr->max_val); while (old < new) old = cmpxchg(&ptr->max_val, old, new); [1]: https://lwn.net/Articles/849237/ Suggested-by: Jan Kara Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/d89e599658b4a1f3893a48c6feded200073037fc.1644992076.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 28 +++++++++------------------- include/linux/jbd2.h | 3 --- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 259e00046a8bd..83801a8be078e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal, transaction->t_start_time = ktime_get(); transaction->t_tid = journal->j_transaction_sequence++; transaction->t_expires = jiffies + journal->j_commit_interval; - spin_lock_init(&transaction->t_handle_lock); atomic_set(&transaction->t_updates, 0); atomic_set(&transaction->t_outstanding_credits, jbd2_descriptor_blocks_per_trans(journal) + @@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal, /* * Update transaction's maximum wait time, if debugging is enabled. * - * In order for t_max_wait to be reliable, it must be protected by a - * lock. But doing so will mean that start_this_handle() can not be - * run in parallel on SMP systems, which limits our scalability. So - * unless debugging is enabled, we no longer update t_max_wait, which - * means that maximum wait time reported by the jbd2_run_stats - * tracepoint will always be zero. + * t_max_wait is carefully updated here with use of atomic compare exchange. + * Note that there could be multiplre threads trying to do this simultaneously + * hence using cmpxchg to avoid any use of locks in this case. */ static inline void update_t_max_wait(transaction_t *transaction, unsigned long ts) { #ifdef CONFIG_JBD2_DEBUG + unsigned long oldts, newts; if (jbd2_journal_enable_debug && time_after(transaction->t_start, ts)) { - ts = jbd2_time_diff(ts, transaction->t_start); - spin_lock(&transaction->t_handle_lock); - if (ts > transaction->t_max_wait) - transaction->t_max_wait = ts; - spin_unlock(&transaction->t_handle_lock); + newts = jbd2_time_diff(ts, transaction->t_start); + oldts = READ_ONCE(transaction->t_max_wait); + while (oldts < newts) + oldts = cmpxchg(&transaction->t_max_wait, oldts, newts); } #endif } @@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) DIV_ROUND_UP( handle->h_revoke_credits_requested, journal->j_revoke_records_per_block); - spin_lock(&transaction->t_handle_lock); wanted = atomic_add_return(nblocks, &transaction->t_outstanding_credits); @@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) jbd_debug(3, "denied handle %p %d blocks: " "transaction too large\n", handle, nblocks); atomic_sub(nblocks, &transaction->t_outstanding_credits); - goto unlock; + goto error_out; } trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, @@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) result = 0; jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); -unlock: - spin_unlock(&transaction->t_handle_lock); error_out: read_unlock(&journal->j_state_lock); return result; @@ -860,15 +853,12 @@ void jbd2_journal_wait_updates(journal_t *journal) if (!transaction) break; - spin_lock(&transaction->t_handle_lock); prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE); if (!atomic_read(&transaction->t_updates)) { - spin_unlock(&transaction->t_handle_lock); finish_wait(&journal->j_wait_updates, &wait); break; } - spin_unlock(&transaction->t_handle_lock); write_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_updates, &wait); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9c3ada74ffb18..a787872e1e86d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -554,9 +554,6 @@ struct transaction_chp_stats_s { * ->j_list_lock * * j_state_lock - * ->t_handle_lock - * - * j_state_lock * ->j_list_lock (journal_unmap_buffer) * */ -- GitLab From 2d4429205882817100e5e88870ce0663d30c77af Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:30:36 +0530 Subject: [PATCH 0678/1586] jbd2: remove CONFIG_JBD2_DEBUG to update t_max_wait CONFIG_JBD2_DEBUG and jbd2_journal_enable_debug knobs were added in update_t_max_wait(), since earlier it used to take a spinlock for updating t_max_wait, which could cause a bottleneck while starting a txn (start_this_handle()). Since in previous patch, we have killed t_handle_lock completely, we could get rid of this debug config and knob to let t_max_wait be updated by default again. Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/ad7319a601fd501079310747ce87d908e0944763.1644992076.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 83801a8be078e..73ed02f061e18 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -141,20 +141,19 @@ static void jbd2_get_transaction(journal_t *journal, * t_max_wait is carefully updated here with use of atomic compare exchange. * Note that there could be multiplre threads trying to do this simultaneously * hence using cmpxchg to avoid any use of locks in this case. + * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug. */ static inline void update_t_max_wait(transaction_t *transaction, unsigned long ts) { -#ifdef CONFIG_JBD2_DEBUG unsigned long oldts, newts; - if (jbd2_journal_enable_debug && - time_after(transaction->t_start, ts)) { + + if (time_after(transaction->t_start, ts)) { newts = jbd2_time_diff(ts, transaction->t_start); oldts = READ_ONCE(transaction->t_max_wait); while (oldts < newts) oldts = cmpxchg(&transaction->t_max_wait, oldts, newts); } -#endif } /* -- GitLab From a5c0e2fdf7cea535ba03259894dc184e5a4c2800 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:43 +0530 Subject: [PATCH 0679/1586] ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb ext4_mb_mark_bb() currently wrongly calculates cluster len (clen) and flex_group->free_clusters. This patch fixes that. Identified based on code review of ext4_mb_mark_bb() function. Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/a0b035d536bafa88110b74456853774b64c8ac40.1644992609.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 67ac95c4cd9b8..b8ffbc0ebe14e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3899,10 +3899,11 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, clen, err; + int i, err; int already; + unsigned int clen, clen_changed; - clen = EXT4_B2C(sbi, len); + clen = EXT4_NUM_B2C(sbi, len); ext4_get_group_no_and_offset(sb, block, &group, &blkoff); bitmap_bh = ext4_read_block_bitmap(sb, group); @@ -3923,6 +3924,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) already++; + clen_changed = clen - already; if (state) ext4_set_bits(bitmap_bh->b_data, blkoff, clen); else @@ -3935,9 +3937,9 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, group, gdp)); } if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen + already; + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; else - clen = ext4_free_group_clusters(sb, gdp) + clen - already; + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; ext4_free_group_clusters_set(sb, gdp, clen); ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); @@ -3947,10 +3949,13 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); - atomic64_sub(len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); } err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); -- GitLab From bfdc502a4a4c058bf4cbb1df0c297761d528f54d Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:44 +0530 Subject: [PATCH 0680/1586] ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit In case of flex_bg feature (which is by default enabled), extents for any given inode might span across blocks from two different block group. ext4_mb_mark_bb() only reads the buffer_head of block bitmap once for the starting block group, but it fails to read it again when the extent length boundary overflows to another block group. Then in this below loop it accesses memory beyond the block group bitmap buffer_head and results into a data abort. for (i = 0; i < clen; i++) if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) already++; This patch adds this functionality for checking block group boundary in ext4_mb_mark_bb() and update the buffer_head(bitmap_bh) for every different block group. w/o this patch, I was easily able to hit a data access abort using Power platform. <...> [ 74.327662] EXT4-fs error (device loop3): ext4_mb_generate_buddy:1141: group 11, block bitmap and bg descriptor inconsistent: 21248 vs 23294 free clusters [ 74.533214] EXT4-fs (loop3): shut down requested (2) [ 74.536705] Aborting journal on device loop3-8. [ 74.702705] BUG: Unable to handle kernel data access on read at 0xc00000005e980000 [ 74.703727] Faulting instruction address: 0xc0000000007bffb8 cpu 0xd: Vector: 300 (Data Access) at [c000000015db7060] pc: c0000000007bffb8: ext4_mb_mark_bb+0x198/0x5a0 lr: c0000000007bfeec: ext4_mb_mark_bb+0xcc/0x5a0 sp: c000000015db7300 msr: 800000000280b033 dar: c00000005e980000 dsisr: 40000000 current = 0xc000000027af6880 paca = 0xc00000003ffd5200 irqmask: 0x03 irq_happened: 0x01 pid = 5167, comm = mount <...> enter ? for help [c000000015db7380] c000000000782708 ext4_ext_clear_bb+0x378/0x410 [c000000015db7400] c000000000813f14 ext4_fc_replay+0x1794/0x2000 [c000000015db7580] c000000000833f7c do_one_pass+0xe9c/0x12a0 [c000000015db7710] c000000000834504 jbd2_journal_recover+0x184/0x2d0 [c000000015db77c0] c000000000841398 jbd2_journal_load+0x188/0x4a0 [c000000015db7880] c000000000804de8 ext4_fill_super+0x2638/0x3e10 [c000000015db7a40] c0000000005f8404 get_tree_bdev+0x2b4/0x350 [c000000015db7ae0] c0000000007ef058 ext4_get_tree+0x28/0x40 [c000000015db7b00] c0000000005f6344 vfs_get_tree+0x44/0x100 [c000000015db7b70] c00000000063c408 path_mount+0xdd8/0xe70 [c000000015db7c40] c00000000063c8f0 sys_mount+0x450/0x550 [c000000015db7d50] c000000000035770 system_call_exception+0x4a0/0x4e0 [c000000015db7e10] c00000000000c74c system_call_common+0xec/0x250 Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/2609bc8f66fc15870616ee416a18a3d392a209c4.1644992609.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 131 +++++++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 55 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b8ffbc0ebe14e..816322eddd2bd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3901,72 +3901,93 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, ext4_grpblk_t blkoff; int i, err; int already; - unsigned int clen, clen_changed; + unsigned int clen, clen_changed, thisgrp_len; - clen = EXT4_NUM_B2C(sbi, len); - - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto out_err; - } - - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto out_err; + while (len > 0) { + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) - already++; - - clen_changed = clen - already; - if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen_changed; - else - clen = ext4_free_group_clusters(sb, gdp) + clen_changed; + /* + * Check to see if we are freeing blocks across a group + * boundary. + * In case of flex_bg, this can happen that (block, len) may + * span across more than one group. In that case we need to + * get the corresponding group metadata to work with. + * For this we have goto again loop. + */ + thisgrp_len = min_t(unsigned int, (unsigned int)len, + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + clen = EXT4_NUM_B2C(sbi, thisgrp_len); - ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; + break; + } - ext4_unlock_group(sb, group); + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + break; - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); - struct flex_groups *fg = sbi_array_rcu_deref(sbi, - s_flex_groups, flex_group); + ext4_lock_group(sb, group); + already = 0; + for (i = 0; i < clen; i++) + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == + !state) + already++; + clen_changed = clen - already; if (state) - atomic64_sub(clen_changed, &fg->free_clusters); + ext4_set_bits(bitmap_bh->b_data, blkoff, clen); else - atomic64_add(clen_changed, &fg->free_clusters); + mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + if (state) + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; + else + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; + + ext4_free_group_clusters_set(sb, gdp, clen); + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); + + ext4_unlock_group(sb, group); + + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); + + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); + + } + + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); + if (err) + break; + sync_dirty_buffer(bitmap_bh); + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); + sync_dirty_buffer(gdp_bh); + if (err) + break; + + block += thisgrp_len; + len -= thisgrp_len; + brelse(bitmap_bh); + BUG_ON(len < 0); } - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); if (err) - goto out_err; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); - -out_err: - brelse(bitmap_bh); + brelse(bitmap_bh); } /* -- GitLab From 8ac3939db99f99667b8eb670cf4baf292896e72d Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:45 +0530 Subject: [PATCH 0681/1586] ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb() ext4_free_blocks() function became too long and confusing, this patch just pulls out the ext4_mb_clear_bb() function logic from it which clears the block bitmap and frees it. No functionality change in this patch Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/22c30fbb26ba409cf8aa5f0c7912970272c459e8.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 180 ++++++++++++++++++++++++++-------------------- 1 file changed, 102 insertions(+), 78 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 816322eddd2bd..7b80c5dd9f40f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5872,7 +5872,8 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, } /** - * ext4_free_blocks() -- Free given blocks and update quota + * ext4_mb_clear_bb() -- helper function for freeing blocks. + * Used by ext4_free_blocks() * @handle: handle for this transaction * @inode: inode * @bh: optional buffer of the block to be freed @@ -5880,9 +5881,9 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t block, - unsigned long count, int flags) +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, + int flags) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; @@ -5899,80 +5900,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); - if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); - return; - } - - might_sleep(); - if (bh) { - if (block) - BUG_ON(block != bh->b_blocknr); - else - block = bh->b_blocknr; - } - - if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && - !ext4_inode_block_valid(inode, block, count)) { - ext4_error(sb, "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); - goto error_return; - } - - ext4_debug("freeing block %llu\n", block); - trace_ext4_free_blocks(inode, block, count, flags); - - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - BUG_ON(count > 1); - - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, - inode, bh, block); - } - - /* - * If the extent to be freed does not begin on a cluster - * boundary, we need to deal with partial clusters at the - * beginning and end of the extent. Normally we will free - * blocks at the beginning or the end unless we are explicitly - * requested to avoid doing so. - */ - overflow = EXT4_PBLK_COFF(sbi, block); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { - overflow = sbi->s_cluster_ratio - overflow; - block += overflow; - if (count > overflow) - count -= overflow; - else - return; - } else { - block -= overflow; - count += overflow; - } - } - overflow = EXT4_LBLK_COFF(sbi, count); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { - if (count > overflow) - count -= overflow; - else - return; - } else - count += sbi->s_cluster_ratio - overflow; - } - - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - int i; - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; - - for (i = 0; i < count; i++) { - cond_resched(); - if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); - ext4_forget(handle, is_metadata, inode, bh, block + i); - } - } - do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -6140,6 +6067,103 @@ error_return: return; } +/** + * ext4_free_blocks() -- Free given blocks and update quota + * @handle: handle for this transaction + * @inode: inode + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed + * @flags: flags used by ext4_free_blocks + */ +void ext4_free_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags) +{ + struct super_block *sb = inode->i_sb; + unsigned int overflow; + struct ext4_sb_info *sbi; + + sbi = EXT4_SB(sb); + + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + ext4_free_blocks_simple(inode, block, count); + return; + } + + might_sleep(); + if (bh) { + if (block) + BUG_ON(block != bh->b_blocknr); + else + block = bh->b_blocknr; + } + + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_inode_block_valid(inode, block, count)) { + ext4_error(sb, "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); + return; + } + + ext4_debug("freeing block %llu\n", block); + trace_ext4_free_blocks(inode, block, count, flags); + + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + BUG_ON(count > 1); + + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block); + } + + /* + * If the extent to be freed does not begin on a cluster + * boundary, we need to deal with partial clusters at the + * beginning and end of the extent. Normally we will free + * blocks at the beginning or the end unless we are explicitly + * requested to avoid doing so. + */ + overflow = EXT4_PBLK_COFF(sbi, block); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { + overflow = sbi->s_cluster_ratio - overflow; + block += overflow; + if (count > overflow) + count -= overflow; + else + return; + } else { + block -= overflow; + count += overflow; + } + } + overflow = EXT4_LBLK_COFF(sbi, count); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { + if (count > overflow) + count -= overflow; + else + return; + } else + count += sbi->s_cluster_ratio - overflow; + } + + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + int i; + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; + + for (i = 0; i < count; i++) { + cond_resched(); + if (is_metadata) + bh = sb_find_get_block(inode->i_sb, block + i); + ext4_forget(handle, is_metadata, inode, bh, block + i); + } + } + + ext4_mb_clear_bb(handle, inode, block, count, flags); + return; +} + /** * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction -- GitLab From dbaafbadc5c3dad4010099d0ff135204a8dbff49 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:46 +0530 Subject: [PATCH 0682/1586] ext4: use in_range() for range checking in ext4_fc_replay_check_excluded Instead of open coding it, use in_range() function instead. Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/8e5526ef14150778871ac7c937c8993c6a09cd3e.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 7964ee34e322a..3c5baca38767b 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1875,8 +1875,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) if (state->fc_regions[i].ino == 0 || state->fc_regions[i].len == 0) continue; - if (blk >= state->fc_regions[i].pblk && - blk < state->fc_regions[i].pblk + state->fc_regions[i].len) + if (in_range(blk, state->fc_regions[i].pblk, + state->fc_regions[i].len)) return true; } return false; -- GitLab From 123e3016ee9b3674a819537bc4c3174e25cd48fc Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:47 +0530 Subject: [PATCH 0683/1586] ext4: rename ext4_set_bits to mb_set_bits ext4_set_bits() should actually be mb_set_bits() for uniform API naming convention. This is via below cmd - grep -nr "ext4_set_bits" fs/ext4/ | cut -d ":" -f 1 | xargs sed -i 's/ext4_set_bits/mb_set_bits/g' Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/f1f6ece1405b76a7a987e9145d1adfaf71e30695.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 14 +++++++------- fs/ext4/resize.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bcd3b9bf8069b..97c85ae185a92 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1279,7 +1279,7 @@ struct ext4_inode_info { #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le -extern void ext4_set_bits(void *bm, int cur, int len); +extern void mb_set_bits(void *bm, int cur, int len); /* * Maximal mount counts between two filesystem checks diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7b80c5dd9f40f..c2a6f924b4569 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1689,7 +1689,7 @@ static int mb_test_and_clear_bits(void *bm, int cur, int len) return zero_bit; } -void ext4_set_bits(void *bm, int cur, int len) +void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1996,7 +1996,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info); - ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); + mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -3825,7 +3825,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3844,7 +3844,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); if (ext4_has_group_desc_csum(sb) && (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { @@ -3939,7 +3939,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, clen_changed = clen - already; if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); + mb_set_bits(bitmap_bh->b_data, blkoff, clen); else mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); if (ext4_has_group_desc_csum(sb) && @@ -4459,7 +4459,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, efd_node); - ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); + mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); n = rb_next(n); } return; @@ -4500,7 +4500,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - ext4_set_bits(bitmap, start, len); + mb_set_bits(bitmap, start, len); preallocated += len; } mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ee8f02f406cb6..f507f34be602f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -483,7 +483,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", first_cluster, first_cluster - start, count2); - ext4_set_bits(bh->b_data, first_cluster - start, count2); + mb_set_bits(bh->b_data, first_cluster - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); @@ -632,7 +632,7 @@ handle_bb: if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, + mb_set_bits(bh->b_data, 0, EXT4_NUM_B2C(sbi, overhead)); } ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), -- GitLab From bd8247eee1a2b22e2270b3933ab8dca9316b3718 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:48 +0530 Subject: [PATCH 0684/1586] ext4: no need to test for block bitmap bits in ext4_mb_mark_bb() We don't need the return value of mb_test_and_clear_bits() in ext4_mb_mark_bb() So simply use mb_clear_bits() instead. Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/a971935306dafb124da0193c7dad1aa485210b62.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c2a6f924b4569..bb3cfcd545cee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3941,7 +3941,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, if (state) mb_set_bits(bitmap_bh->b_data, blkoff, clen); else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); + mb_clear_bits(bitmap_bh->b_data, blkoff, clen); if (ext4_has_group_desc_csum(sb) && (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); -- GitLab From 6bc6c2bdf1baca6522b8d9ba976257d722423085 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:49 +0530 Subject: [PATCH 0685/1586] ext4: add ext4_sb_block_valid() refactored out of ext4_inode_block_valid() This API will be needed at places where we don't have an inode for e.g. while freeing blocks in ext4_group_add_blocks() Suggested-by: Jan Kara Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/dd34a236543ad5ae7123eeebe0cb69e6bdd44f34.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 26 +++++++++++++++++--------- fs/ext4/ext4.h | 3 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4666b55b736ec..5504f72bbbbe7 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } -/* - * Returns 1 if the passed-in block region (start_blk, - * start_blk+count) is valid; 0 if some part of the block region - * overlaps with some other filesystem metadata blocks. - */ -int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, - unsigned int count) +int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_system_blocks *system_blks; struct ext4_system_zone *entry; struct rb_node *n; @@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, else if (start_blk >= (entry->start_blk + entry->count)) n = n->rb_right; else { - ret = (entry->ino == inode->i_ino); + ret = 0; + if (inode) + ret = (entry->ino == inode->i_ino); break; } } @@ -338,6 +335,17 @@ out_rcu: return ret; } +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with some other filesystem metadata blocks. + */ +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, + unsigned int count) +{ + return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count); +} + int ext4_check_blockref(const char *function, unsigned int line, struct inode *inode, __le32 *p, unsigned int max) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 97c85ae185a92..0d4f284c0514e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3707,6 +3707,9 @@ extern int ext4_inode_block_valid(struct inode *inode, unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); +extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count); + /* extents.c */ struct ext4_ext_path; -- GitLab From a00b482b82fb098956a5bed22bd7873e56f152f1 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:50 +0530 Subject: [PATCH 0686/1586] ext4: add strict range checks while freeing blocks Currently ext4_mb_clear_bb() & ext4_group_add_blocks() only checks whether the given block ranges (which is to be freed) belongs to any FS metadata blocks or not, of the block's respective block group. But to detect any FS error early, it is better to add more strict checkings in those functions which checks whether the given blocks belongs to any critical FS metadata or not within system-zone. Suggested-by: Jan Kara Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/ddd9143d064774e32d6364a99667817c6e8bfdc0.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bb3cfcd545cee..0a95bdb1e07ba 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5930,13 +5930,7 @@ do_more: goto error_return; } - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || - in_range(ext4_inode_bitmap(sb, gdp), block, count) || - in_range(block, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group)) { - + if (!ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ @@ -6007,7 +6001,7 @@ do_more: NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" - " group:%d block:%d count:%lu failed" + " group:%u block:%d count:%lu failed" " with %d", block_group, bit, count, err); } else @@ -6220,11 +6214,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, goto error_return; } - if (in_range(ext4_block_bitmap(sb, desc), block, count) || - in_range(ext4_inode_bitmap(sb, desc), block, count) || - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) { + if (!ext4_sb_block_valid(sb, NULL, block, count)) { ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); -- GitLab From 8c91c57907d3ad8f88a12097213bb0920eb453b8 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 16 Feb 2022 12:32:51 +0530 Subject: [PATCH 0687/1586] ext4: add extra check in ext4_mb_mark_bb() to prevent against possible corruption This patch adds an extra checks in ext4_mb_mark_bb() function to make sure we mark & report error if we were to mark/clear any of the critical FS metadata specific bitmaps (&bail out) to prevent from any accidental corruption. Suggested-by: Jan Kara Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/53cbb6f2573db162a57f935365050d8b1df202ee.1644992610.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0a95bdb1e07ba..5f0bc6d0aabe0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3918,6 +3918,14 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); clen = EXT4_NUM_B2C(sbi, thisgrp_len); + if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) { + ext4_error(sb, "Marking blocks in system zone - " + "Block = %llu, len = %u", + block, thisgrp_len); + bitmap_bh = NULL; + break; + } + bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) { err = PTR_ERR(bitmap_bh); -- GitLab From 575d6b77fa2697afd2b3a443f7f879faa65ae0ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Feb 2022 17:20:02 -0800 Subject: [PATCH 0688/1586] m68k: Implement "current_stack_pointer" To follow the existing per-arch conventions, add asm "sp" as "current_stack_pointer". This will let it be used in non-arch places (like HARDENED_USERCOPY). Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Kees Cook Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/lkml/CAMuHMdU6msvi0j=mS28GFYbm+uMRk7PkYe+zOM4sDmOVxeibLQ@mail.gmail.com --- arch/m68k/Kconfig | 1 + arch/m68k/include/asm/current.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 936e1803c7c7d..f0eac0e2f1237 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,6 +4,7 @@ config M68K default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS diff --git a/arch/m68k/include/asm/current.h b/arch/m68k/include/asm/current.h index 6390ef2f7f864..c117907e12765 100644 --- a/arch/m68k/include/asm/current.h +++ b/arch/m68k/include/asm/current.h @@ -24,6 +24,8 @@ static inline struct task_struct *get_current(void) #define current get_current() -#endif /* CONFNIG_MMU */ +#endif /* CONFIG_MMU */ + +register unsigned long current_stack_pointer __asm__("sp"); #endif /* !(_M68K_CURRENT_H) */ -- GitLab From 617f55e20743fc50c989b498f9dee289eb644cfd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Feb 2022 14:17:49 -0800 Subject: [PATCH 0689/1586] lib: overflow: Convert to Kunit Convert overflow unit tests to KUnit, for better integration into the kernel self test framework. Includes a rename of test_overflow.c to overflow_kunit.c, and CONFIG_TEST_OVERFLOW to CONFIG_OVERFLOW_KUNIT_TEST. $ ./tools/testing/kunit/kunit.py run overflow ... [14:33:51] Starting KUnit Kernel (1/1)... [14:33:51] ============================================================ [14:33:51] ================== overflow (11 subtests) ================== [14:33:51] [PASSED] u8_overflow_test [14:33:51] [PASSED] s8_overflow_test [14:33:51] [PASSED] u16_overflow_test [14:33:51] [PASSED] s16_overflow_test [14:33:51] [PASSED] u32_overflow_test [14:33:51] [PASSED] s32_overflow_test [14:33:51] [PASSED] u64_overflow_test [14:33:51] [PASSED] s64_overflow_test [14:33:51] [PASSED] overflow_shift_test [14:33:51] [PASSED] overflow_allocation_test [14:33:51] [PASSED] overflow_size_helpers_test [14:33:51] ==================== [PASSED] overflow ===================== [14:33:51] ============================================================ [14:33:51] Testing complete. Passed: 11, Failed: 0, Crashed: 0, Skipped: 0, Errors: 0 [14:33:51] Elapsed time: 12.525s total, 0.001s configuring, 12.402s building, 0.101s running Cc: Rasmus Villemoes Cc: Nick Desaulniers Co-developed-by: Vitor Massaru Iha Signed-off-by: Vitor Massaru Iha Link: https://lore.kernel.org/lkml/20200720224418.200495-1-vitor@massaru.org/ Co-developed-by: Daniel Latypov Signed-off-by: Daniel Latypov Link: https://lore.kernel.org/linux-kselftest/20210503211536.1384578-1-dlatypov@google.com/ Acked-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAKwvOdm62iA1dNiC6Q11UJ-MnTqtc4kXkm-ubPaFMK824_k0nw@mail.gmail.com Signed-off-by: Kees Cook Reviewed-by: David Gow Link: https://lore.kernel.org/lkml/CABVgOS=TWVh649_Vjo3wnMu9gZnq66gkV-LtGgsksAWMqc+MSA@mail.gmail.com --- lib/Kconfig.debug | 16 +- lib/Makefile | 2 +- lib/{test_overflow.c => overflow_kunit.c} | 554 ++++++++++------------ 3 files changed, 263 insertions(+), 309 deletions(-) rename lib/{test_overflow.c => overflow_kunit.c} (54%) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 14b89aa37c5c9..14d90d03bc8d9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2214,9 +2214,6 @@ config TEST_UUID config TEST_XARRAY tristate "Test the XArray code at runtime" -config TEST_OVERFLOW - tristate "Test check_*_overflow() functions at runtime" - config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" help @@ -2501,6 +2498,19 @@ config MEMCPY_KUNIT_TEST If unsure, say N. +config OVERFLOW_KUNIT_TEST + tristate "Test check_*_overflow() functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Builds unit tests for the check_*_overflow(), size_*(), allocation, and + related functions. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index 300f569c626b0..fdfcbfaff32f1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -77,7 +77,6 @@ obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o -obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o @@ -363,6 +362,7 @@ obj-$(CONFIG_BITS_TEST) += test_bits.o obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o +obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/test_overflow.c b/lib/overflow_kunit.c similarity index 54% rename from lib/test_overflow.c rename to lib/overflow_kunit.c index f6530fce799db..475f0c064bf65 100644 --- a/lib/test_overflow.c +++ b/lib/overflow_kunit.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Test cases for arithmetic overflow checks. + * Test cases for arithmetic overflow checks. See: + * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests + * ./tools/testing/kunit/kunit.py run overflow [--raw_output] */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include -#include #include #include #include @@ -19,7 +21,7 @@ t a, b; \ t sum, diff, prod; \ bool s_of, d_of, p_of; \ - } t ## _tests[] __initconst + } t ## _tests[] DEFINE_TEST_ARRAY(u8) = { {0, 0, 0, 0, 0, false, false, false}, @@ -220,43 +222,31 @@ DEFINE_TEST_ARRAY(s64) = { bool _of; \ \ _of = check_ ## op ## _overflow(a, b, &_r); \ - if (_of != of) { \ - pr_warn("expected "fmt" "sym" "fmt \ - " to%s overflow (type %s)\n", \ - a, b, of ? "" : " not", #t); \ - err = 1; \ - } \ - if (_r != r) { \ - pr_warn("expected "fmt" "sym" "fmt" == " \ - fmt", got "fmt" (type %s)\n", \ - a, b, r, _r, #t); \ - err = 1; \ - } \ + KUNIT_EXPECT_EQ_MSG(test, _of, of, \ + "expected "fmt" "sym" "fmt" to%s overflow (type %s)\n", \ + a, b, of ? "" : " not", #t); \ + KUNIT_EXPECT_EQ_MSG(test, _r, r, \ + "expected "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \ + a, b, r, _r, #t); \ } while (0) #define DEFINE_TEST_FUNC(t, fmt) \ -static int __init do_test_ ## t(const struct test_ ## t *p) \ +static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \ { \ - int err = 0; \ - \ check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \ check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \ check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \ check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of); \ check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \ - \ - return err; \ } \ \ -static int __init test_ ## t ## _overflow(void) { \ - int err = 0; \ +static void t ## _overflow_test(struct kunit *test) { \ unsigned i; \ \ for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ - err |= do_test_ ## t(&t ## _tests[i]); \ - pr_info("%zu %s arithmetic tests finished\n", \ + do_test_ ## t(test, &t ## _tests[i]); \ + kunit_info(test, "%zu %s arithmetic tests finished\n", \ ARRAY_SIZE(t ## _tests), #t); \ - return err; \ } DEFINE_TEST_FUNC(u8, "%d"); @@ -270,198 +260,176 @@ DEFINE_TEST_FUNC(u64, "%llu"); DEFINE_TEST_FUNC(s64, "%lld"); #endif -static int __init test_overflow_calculation(void) +static void overflow_shift_test(struct kunit *test) { - int err = 0; - - err |= test_u8_overflow(); - err |= test_s8_overflow(); - err |= test_u16_overflow(); - err |= test_s16_overflow(); - err |= test_u32_overflow(); - err |= test_s32_overflow(); -#if BITS_PER_LONG == 64 - err |= test_u64_overflow(); - err |= test_s64_overflow(); -#endif - - return err; -} - -static int __init test_overflow_shift(void) -{ - int err = 0; int count = 0; /* Args are: value, shift, type, expected result, overflow expected */ -#define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \ - int __failed = 0; \ +#define TEST_ONE_SHIFT(a, s, t, expect, of) do { \ typeof(a) __a = (a); \ typeof(s) __s = (s); \ t __e = (expect); \ t __d; \ bool __of = check_shl_overflow(__a, __s, &__d); \ if (__of != of) { \ - pr_warn("expected (%s)(%s << %s) to%s overflow\n", \ + KUNIT_EXPECT_EQ_MSG(test, __of, of, \ + "expected (%s)(%s << %s) to%s overflow\n", \ #t, #a, #s, of ? "" : " not"); \ - __failed = 1; \ } else if (!__of && __d != __e) { \ - pr_warn("expected (%s)(%s << %s) == %s\n", \ + KUNIT_EXPECT_EQ_MSG(test, __d, __e, \ + "expected (%s)(%s << %s) == %s\n", \ #t, #a, #s, #expect); \ if ((t)-1 < 0) \ - pr_warn("got %lld\n", (s64)__d); \ + kunit_info(test, "got %lld\n", (s64)__d); \ else \ - pr_warn("got %llu\n", (u64)__d); \ - __failed = 1; \ + kunit_info(test, "got %llu\n", (u64)__d); \ } \ count++; \ - __failed; \ -}) +} while (0) /* Sane shifts. */ - err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); - err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false); - err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false); - err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false); - err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false); - err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false); - err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false); - err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false); - err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false); - err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false); - err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false); - err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false); - err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false); - err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false); - err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false); - err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false); - err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false); - err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false); - err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false); - err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false); - err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false); - err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, - 0xFFFFFFFFULL << 32, false); + TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); + TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); + TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false); + TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false); + TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false); + TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false); + TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false); + TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false); + TEST_ONE_SHIFT(1, 0, int, 1 << 0, false); + TEST_ONE_SHIFT(1, 16, int, 1 << 16, false); + TEST_ONE_SHIFT(1, 30, int, 1 << 30, false); + TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false); + TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false); + TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false); + TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false); + TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false); + TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false); + TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false); + TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false); + TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false); + TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false); + TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false); + TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false); + TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false); + TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false); + TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, 0xFFFFFFFFULL << 32, false); /* Sane shift: start and end with 0, without a too-wide shift. */ - err |= TEST_ONE_SHIFT(0, 7, u8, 0, false); - err |= TEST_ONE_SHIFT(0, 15, u16, 0, false); - err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false); - err |= TEST_ONE_SHIFT(0, 31, u32, 0, false); - err |= TEST_ONE_SHIFT(0, 63, u64, 0, false); + TEST_ONE_SHIFT(0, 7, u8, 0, false); + TEST_ONE_SHIFT(0, 15, u16, 0, false); + TEST_ONE_SHIFT(0, 31, unsigned int, 0, false); + TEST_ONE_SHIFT(0, 31, u32, 0, false); + TEST_ONE_SHIFT(0, 63, u64, 0, false); /* Sane shift: start and end with 0, without reaching signed bit. */ - err |= TEST_ONE_SHIFT(0, 6, s8, 0, false); - err |= TEST_ONE_SHIFT(0, 14, s16, 0, false); - err |= TEST_ONE_SHIFT(0, 30, int, 0, false); - err |= TEST_ONE_SHIFT(0, 30, s32, 0, false); - err |= TEST_ONE_SHIFT(0, 62, s64, 0, false); + TEST_ONE_SHIFT(0, 6, s8, 0, false); + TEST_ONE_SHIFT(0, 14, s16, 0, false); + TEST_ONE_SHIFT(0, 30, int, 0, false); + TEST_ONE_SHIFT(0, 30, s32, 0, false); + TEST_ONE_SHIFT(0, 62, s64, 0, false); /* Overflow: shifted the bit off the end. */ - err |= TEST_ONE_SHIFT(1, 8, u8, 0, true); - err |= TEST_ONE_SHIFT(1, 16, u16, 0, true); - err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(1, 32, u32, 0, true); - err |= TEST_ONE_SHIFT(1, 64, u64, 0, true); + TEST_ONE_SHIFT(1, 8, u8, 0, true); + TEST_ONE_SHIFT(1, 16, u16, 0, true); + TEST_ONE_SHIFT(1, 32, unsigned int, 0, true); + TEST_ONE_SHIFT(1, 32, u32, 0, true); + TEST_ONE_SHIFT(1, 64, u64, 0, true); /* Overflow: shifted into the signed bit. */ - err |= TEST_ONE_SHIFT(1, 7, s8, 0, true); - err |= TEST_ONE_SHIFT(1, 15, s16, 0, true); - err |= TEST_ONE_SHIFT(1, 31, int, 0, true); - err |= TEST_ONE_SHIFT(1, 31, s32, 0, true); - err |= TEST_ONE_SHIFT(1, 63, s64, 0, true); + TEST_ONE_SHIFT(1, 7, s8, 0, true); + TEST_ONE_SHIFT(1, 15, s16, 0, true); + TEST_ONE_SHIFT(1, 31, int, 0, true); + TEST_ONE_SHIFT(1, 31, s32, 0, true); + TEST_ONE_SHIFT(1, 63, s64, 0, true); /* Overflow: high bit falls off unsigned types. */ /* 10010110 */ - err |= TEST_ONE_SHIFT(150, 1, u8, 0, true); + TEST_ONE_SHIFT(150, 1, u8, 0, true); /* 1000100010010110 */ - err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true); + TEST_ONE_SHIFT(34966, 1, u16, 0, true); /* 10000100000010001000100010010110 */ - err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true); - err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true); + TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true); + TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true); /* 1000001000010000010000000100000010000100000010001000100010010110 */ - err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true); + TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true); /* Overflow: bit shifted into signed bit on signed types. */ /* 01001011 */ - err |= TEST_ONE_SHIFT(75, 1, s8, 0, true); + TEST_ONE_SHIFT(75, 1, s8, 0, true); /* 0100010001001011 */ - err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true); + TEST_ONE_SHIFT(17483, 1, s16, 0, true); /* 01000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true); - err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true); + TEST_ONE_SHIFT(1107575883, 1, s32, 0, true); + TEST_ONE_SHIFT(1107575883, 1, int, 0, true); /* 0100000100001000001000000010000001000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true); + TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true); /* Overflow: bit shifted past signed bit on signed types. */ /* 01001011 */ - err |= TEST_ONE_SHIFT(75, 2, s8, 0, true); + TEST_ONE_SHIFT(75, 2, s8, 0, true); /* 0100010001001011 */ - err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true); + TEST_ONE_SHIFT(17483, 2, s16, 0, true); /* 01000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true); - err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true); + TEST_ONE_SHIFT(1107575883, 2, s32, 0, true); + TEST_ONE_SHIFT(1107575883, 2, int, 0, true); /* 0100000100001000001000000010000001000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); + TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); /* Overflow: values larger than destination type. */ - err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true); - err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); - err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true); - err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true); - err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); + TEST_ONE_SHIFT(0x100, 0, u8, 0, true); + TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); + TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true); + TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true); + TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true); + TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true); + TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true); + TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); + TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); /* Nonsense: negative initial value. */ - err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true); - err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true); - err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true); - err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true); - err |= TEST_ONE_SHIFT(-10, 0, int, 0, true); - err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true); - err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true); - err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true); - err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true); + TEST_ONE_SHIFT(-1, 0, s8, 0, true); + TEST_ONE_SHIFT(-1, 0, u8, 0, true); + TEST_ONE_SHIFT(-5, 0, s16, 0, true); + TEST_ONE_SHIFT(-5, 0, u16, 0, true); + TEST_ONE_SHIFT(-10, 0, int, 0, true); + TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true); + TEST_ONE_SHIFT(-100, 0, s32, 0, true); + TEST_ONE_SHIFT(-100, 0, u32, 0, true); + TEST_ONE_SHIFT(-10000, 0, s64, 0, true); + TEST_ONE_SHIFT(-10000, 0, u64, 0, true); /* Nonsense: negative shift values. */ - err |= TEST_ONE_SHIFT(0, -5, s8, 0, true); - err |= TEST_ONE_SHIFT(0, -5, u8, 0, true); - err |= TEST_ONE_SHIFT(0, -10, s16, 0, true); - err |= TEST_ONE_SHIFT(0, -10, u16, 0, true); - err |= TEST_ONE_SHIFT(0, -15, int, 0, true); - err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(0, -20, s32, 0, true); - err |= TEST_ONE_SHIFT(0, -20, u32, 0, true); - err |= TEST_ONE_SHIFT(0, -30, s64, 0, true); - err |= TEST_ONE_SHIFT(0, -30, u64, 0, true); + TEST_ONE_SHIFT(0, -5, s8, 0, true); + TEST_ONE_SHIFT(0, -5, u8, 0, true); + TEST_ONE_SHIFT(0, -10, s16, 0, true); + TEST_ONE_SHIFT(0, -10, u16, 0, true); + TEST_ONE_SHIFT(0, -15, int, 0, true); + TEST_ONE_SHIFT(0, -15, unsigned int, 0, true); + TEST_ONE_SHIFT(0, -20, s32, 0, true); + TEST_ONE_SHIFT(0, -20, u32, 0, true); + TEST_ONE_SHIFT(0, -30, s64, 0, true); + TEST_ONE_SHIFT(0, -30, u64, 0, true); /* Overflow: shifted at or beyond entire type's bit width. */ - err |= TEST_ONE_SHIFT(0, 8, u8, 0, true); - err |= TEST_ONE_SHIFT(0, 9, u8, 0, true); - err |= TEST_ONE_SHIFT(0, 8, s8, 0, true); - err |= TEST_ONE_SHIFT(0, 9, s8, 0, true); - err |= TEST_ONE_SHIFT(0, 16, u16, 0, true); - err |= TEST_ONE_SHIFT(0, 17, u16, 0, true); - err |= TEST_ONE_SHIFT(0, 16, s16, 0, true); - err |= TEST_ONE_SHIFT(0, 17, s16, 0, true); - err |= TEST_ONE_SHIFT(0, 32, u32, 0, true); - err |= TEST_ONE_SHIFT(0, 33, u32, 0, true); - err |= TEST_ONE_SHIFT(0, 32, int, 0, true); - err |= TEST_ONE_SHIFT(0, 33, int, 0, true); - err |= TEST_ONE_SHIFT(0, 32, s32, 0, true); - err |= TEST_ONE_SHIFT(0, 33, s32, 0, true); - err |= TEST_ONE_SHIFT(0, 64, u64, 0, true); - err |= TEST_ONE_SHIFT(0, 65, u64, 0, true); - err |= TEST_ONE_SHIFT(0, 64, s64, 0, true); - err |= TEST_ONE_SHIFT(0, 65, s64, 0, true); + TEST_ONE_SHIFT(0, 8, u8, 0, true); + TEST_ONE_SHIFT(0, 9, u8, 0, true); + TEST_ONE_SHIFT(0, 8, s8, 0, true); + TEST_ONE_SHIFT(0, 9, s8, 0, true); + TEST_ONE_SHIFT(0, 16, u16, 0, true); + TEST_ONE_SHIFT(0, 17, u16, 0, true); + TEST_ONE_SHIFT(0, 16, s16, 0, true); + TEST_ONE_SHIFT(0, 17, s16, 0, true); + TEST_ONE_SHIFT(0, 32, u32, 0, true); + TEST_ONE_SHIFT(0, 33, u32, 0, true); + TEST_ONE_SHIFT(0, 32, int, 0, true); + TEST_ONE_SHIFT(0, 33, int, 0, true); + TEST_ONE_SHIFT(0, 32, s32, 0, true); + TEST_ONE_SHIFT(0, 33, s32, 0, true); + TEST_ONE_SHIFT(0, 64, u64, 0, true); + TEST_ONE_SHIFT(0, 65, u64, 0, true); + TEST_ONE_SHIFT(0, 64, s64, 0, true); + TEST_ONE_SHIFT(0, 65, s64, 0, true); /* * Corner case: for unsigned types, we fail when we've shifted @@ -472,17 +440,14 @@ static int __init test_overflow_shift(void) * signed bit). So, for now, we will test this condition but * mark it as not expected to overflow. */ - err |= TEST_ONE_SHIFT(0, 7, s8, 0, false); - err |= TEST_ONE_SHIFT(0, 15, s16, 0, false); - err |= TEST_ONE_SHIFT(0, 31, int, 0, false); - err |= TEST_ONE_SHIFT(0, 31, s32, 0, false); - err |= TEST_ONE_SHIFT(0, 63, s64, 0, false); - - pr_info("%d shift tests finished\n", count); + TEST_ONE_SHIFT(0, 7, s8, 0, false); + TEST_ONE_SHIFT(0, 15, s16, 0, false); + TEST_ONE_SHIFT(0, 31, int, 0, false); + TEST_ONE_SHIFT(0, 31, s32, 0, false); + TEST_ONE_SHIFT(0, 63, s64, 0, false); + kunit_info(test, "%d shift tests finished\n", count); #undef TEST_ONE_SHIFT - - return err; } /* @@ -502,7 +467,7 @@ static int __init test_overflow_shift(void) #define TEST_SIZE (5 * 4096) #define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\ -static int __init test_ ## func (void *arg) \ +static void test_ ## func (struct kunit *test, void *arg) \ { \ volatile size_t a = TEST_SIZE; \ volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1; \ @@ -510,30 +475,24 @@ static int __init test_ ## func (void *arg) \ \ /* Tiny allocation test. */ \ ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\ - if (!ptr) { \ - pr_warn(#func " failed regular allocation?!\n"); \ - return 1; \ - } \ + KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, \ + #func " failed regular allocation?!\n"); \ free ## want_arg (free_func, arg, ptr); \ \ /* Wrapped allocation test. */ \ ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \ a * b); \ - if (!ptr) { \ - pr_warn(#func " unexpectedly failed bad wrapping?!\n"); \ - return 1; \ - } \ + KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, \ + #func " unexpectedly failed bad wrapping?!\n"); \ free ## want_arg (free_func, arg, ptr); \ \ /* Saturated allocation test. */ \ ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \ array_size(a, b)); \ if (ptr) { \ - pr_warn(#func " missed saturation!\n"); \ + KUNIT_FAIL(test, #func " missed saturation!\n"); \ free ## want_arg (free_func, arg, ptr); \ - return 1; \ } \ - return 0; \ } /* @@ -554,44 +513,38 @@ DEFINE_TEST_ALLOC(kvzalloc_node, kvfree, 0, 1, 1); DEFINE_TEST_ALLOC(devm_kmalloc, devm_kfree, 1, 1, 0); DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); -static int __init test_overflow_allocation(void) +static void overflow_allocation_test(struct kunit *test) { const char device_name[] = "overflow-test"; struct device *dev; int count = 0; - int err = 0; -#define check_allocation_overflow(alloc) ({ \ +#define check_allocation_overflow(alloc) do { \ count++; \ - test_ ## alloc(dev); \ -}) + test_ ## alloc(test, dev); \ +} while (0) /* Create dummy device for devm_kmalloc()-family tests. */ dev = root_device_register(device_name); - if (IS_ERR(dev)) { - pr_warn("Cannot register test device\n"); - return 1; - } - - err |= check_allocation_overflow(kmalloc); - err |= check_allocation_overflow(kmalloc_node); - err |= check_allocation_overflow(kzalloc); - err |= check_allocation_overflow(kzalloc_node); - err |= check_allocation_overflow(__vmalloc); - err |= check_allocation_overflow(kvmalloc); - err |= check_allocation_overflow(kvmalloc_node); - err |= check_allocation_overflow(kvzalloc); - err |= check_allocation_overflow(kvzalloc_node); - err |= check_allocation_overflow(devm_kmalloc); - err |= check_allocation_overflow(devm_kzalloc); + KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), + "Cannot register test device\n"); + + check_allocation_overflow(kmalloc); + check_allocation_overflow(kmalloc_node); + check_allocation_overflow(kzalloc); + check_allocation_overflow(kzalloc_node); + check_allocation_overflow(__vmalloc); + check_allocation_overflow(kvmalloc); + check_allocation_overflow(kvmalloc_node); + check_allocation_overflow(kvzalloc); + check_allocation_overflow(kvzalloc_node); + check_allocation_overflow(devm_kmalloc); + check_allocation_overflow(devm_kzalloc); device_unregister(dev); - pr_info("%d allocation overflow tests finished\n", count); - + kunit_info(test, "%d allocation overflow tests finished\n", count); #undef check_allocation_overflow - - return err; } struct __test_flex_array { @@ -600,127 +553,118 @@ struct __test_flex_array { unsigned long data[]; }; -static int __init test_overflow_size_helpers(void) +static void overflow_size_helpers_test(struct kunit *test) { /* Make sure struct_size() can be used in a constant expression. */ u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)]; struct __test_flex_array *obj; int count = 0; - int err = 0; int var; volatile int unconst = 0; /* Verify constant expression against runtime version. */ var = 55; OPTIMIZER_HIDE_VAR(var); - err |= sizeof(ce_array) != struct_size(obj, data, var); + KUNIT_EXPECT_EQ(test, sizeof(ce_array), struct_size(obj, data, var)); -#define check_one_size_helper(expected, func, args...) ({ \ - bool __failure = false; \ - size_t _r; \ - \ - _r = func(args); \ - if (_r != (expected)) { \ - pr_warn("expected " #func "(" #args ") " \ - "to return %zu but got %zu instead\n", \ - (size_t)(expected), _r); \ - __failure = true; \ - } \ +#define check_one_size_helper(expected, func, args...) do { \ + size_t _r = func(args); \ + KUNIT_EXPECT_EQ_MSG(test, _r, expected, \ + "expected " #func "(" #args ") to return %zu but got %zu instead\n", \ + (size_t)(expected), _r); \ count++; \ - __failure; \ -}) +} while (0) var = 4; - err |= check_one_size_helper(20, size_mul, var++, 5); - err |= check_one_size_helper(20, size_mul, 4, var++); - err |= check_one_size_helper(0, size_mul, 0, 3); - err |= check_one_size_helper(0, size_mul, 3, 0); - err |= check_one_size_helper(6, size_mul, 2, 3); - err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 1); - err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 3); - err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3); + check_one_size_helper(20, size_mul, var++, 5); + check_one_size_helper(20, size_mul, 4, var++); + check_one_size_helper(0, size_mul, 0, 3); + check_one_size_helper(0, size_mul, 3, 0); + check_one_size_helper(6, size_mul, 2, 3); + check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 1); + check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 3); + check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3); var = 4; - err |= check_one_size_helper(9, size_add, var++, 5); - err |= check_one_size_helper(9, size_add, 4, var++); - err |= check_one_size_helper(9, size_add, 9, 0); - err |= check_one_size_helper(9, size_add, 0, 9); - err |= check_one_size_helper(5, size_add, 2, 3); - err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 1); - err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 3); - err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3); + check_one_size_helper(9, size_add, var++, 5); + check_one_size_helper(9, size_add, 4, var++); + check_one_size_helper(9, size_add, 9, 0); + check_one_size_helper(9, size_add, 0, 9); + check_one_size_helper(5, size_add, 2, 3); + check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 1); + check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 3); + check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3); var = 4; - err |= check_one_size_helper(1, size_sub, var--, 3); - err |= check_one_size_helper(1, size_sub, 4, var--); - err |= check_one_size_helper(1, size_sub, 3, 2); - err |= check_one_size_helper(9, size_sub, 9, 0); - err |= check_one_size_helper(SIZE_MAX, size_sub, 9, -3); - err |= check_one_size_helper(SIZE_MAX, size_sub, 0, 9); - err |= check_one_size_helper(SIZE_MAX, size_sub, 2, 3); - err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 0); - err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10); - err |= check_one_size_helper(SIZE_MAX, size_sub, 0, SIZE_MAX); - err |= check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX); - err |= check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1, 1); - err |= check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1, 3); - err |= check_one_size_helper(1, size_sub, SIZE_MAX - 1, -3); + check_one_size_helper(1, size_sub, var--, 3); + check_one_size_helper(1, size_sub, 4, var--); + check_one_size_helper(1, size_sub, 3, 2); + check_one_size_helper(9, size_sub, 9, 0); + check_one_size_helper(SIZE_MAX, size_sub, 9, -3); + check_one_size_helper(SIZE_MAX, size_sub, 0, 9); + check_one_size_helper(SIZE_MAX, size_sub, 2, 3); + check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 0); + check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10); + check_one_size_helper(SIZE_MAX, size_sub, 0, SIZE_MAX); + check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX); + check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1, 1); + check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1, 3); + check_one_size_helper(1, size_sub, SIZE_MAX - 1, -3); var = 4; - err |= check_one_size_helper(4 * sizeof(*obj->data), - flex_array_size, obj, data, var++); - err |= check_one_size_helper(5 * sizeof(*obj->data), - flex_array_size, obj, data, var++); - err |= check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst); - err |= check_one_size_helper(sizeof(*obj->data), - flex_array_size, obj, data, 1 + unconst); - err |= check_one_size_helper(7 * sizeof(*obj->data), - flex_array_size, obj, data, 7 + unconst); - err |= check_one_size_helper(SIZE_MAX, - flex_array_size, obj, data, -1 + unconst); - err |= check_one_size_helper(SIZE_MAX, - flex_array_size, obj, data, SIZE_MAX - 4 + unconst); + check_one_size_helper(4 * sizeof(*obj->data), + flex_array_size, obj, data, var++); + check_one_size_helper(5 * sizeof(*obj->data), + flex_array_size, obj, data, var++); + check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst); + check_one_size_helper(sizeof(*obj->data), + flex_array_size, obj, data, 1 + unconst); + check_one_size_helper(7 * sizeof(*obj->data), + flex_array_size, obj, data, 7 + unconst); + check_one_size_helper(SIZE_MAX, + flex_array_size, obj, data, -1 + unconst); + check_one_size_helper(SIZE_MAX, + flex_array_size, obj, data, SIZE_MAX - 4 + unconst); var = 4; - err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)), - struct_size, obj, data, var++); - err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)), - struct_size, obj, data, var++); - err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst); - err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data), - struct_size, obj, data, 1 + unconst); - err |= check_one_size_helper(SIZE_MAX, - struct_size, obj, data, -3 + unconst); - err |= check_one_size_helper(SIZE_MAX, - struct_size, obj, data, SIZE_MAX - 3 + unconst); - - pr_info("%d overflow size helper tests finished\n", count); - - return err; + check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)), + struct_size, obj, data, var++); + check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)), + struct_size, obj, data, var++); + check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst); + check_one_size_helper(sizeof(*obj) + sizeof(*obj->data), + struct_size, obj, data, 1 + unconst); + check_one_size_helper(SIZE_MAX, + struct_size, obj, data, -3 + unconst); + check_one_size_helper(SIZE_MAX, + struct_size, obj, data, SIZE_MAX - 3 + unconst); + + kunit_info(test, "%d overflow size helper tests finished\n", count); +#undef check_one_size_helper } -static int __init test_module_init(void) -{ - int err = 0; - - err |= test_overflow_calculation(); - err |= test_overflow_shift(); - err |= test_overflow_size_helpers(); - err |= test_overflow_allocation(); - - if (err) { - pr_warn("FAIL!\n"); - err = -EINVAL; - } else { - pr_info("all tests passed\n"); - } +static struct kunit_case overflow_test_cases[] = { + KUNIT_CASE(u8_overflow_test), + KUNIT_CASE(s8_overflow_test), + KUNIT_CASE(u16_overflow_test), + KUNIT_CASE(s16_overflow_test), + KUNIT_CASE(u32_overflow_test), + KUNIT_CASE(s32_overflow_test), +#if BITS_PER_LONG == 64 + KUNIT_CASE(u64_overflow_test), + KUNIT_CASE(s64_overflow_test), +#endif + KUNIT_CASE(overflow_shift_test), + KUNIT_CASE(overflow_allocation_test), + KUNIT_CASE(overflow_size_helpers_test), + {} +}; - return err; -} +static struct kunit_suite overflow_test_suite = { + .name = "overflow", + .test_cases = overflow_test_cases, +}; -static void __exit test_module_exit(void) -{ } +kunit_test_suite(overflow_test_suite); -module_init(test_module_init); -module_exit(test_module_exit); MODULE_LICENSE("Dual MIT/GPL"); -- GitLab From e52432e164230929fe1f7b5a67bda0cc870f66d5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Jan 2022 10:43:54 -0800 Subject: [PATCH 0690/1586] tpm: vtpm_proxy: Check length to avoid compiler warning When building with -Warray-bounds under GCC 11.2, this warning was emitted: In function 'memset', inlined from 'vtpm_proxy_fops_read' at drivers/char/tpm/tpm_vtpm_proxy.c:102:2: ./include/linux/fortify-string.h:43:33: warning: '__builtin_memset' pointer overflow between offset 164 and size [2147483648, 4294967295] [-Warray-bounds] 43 | #define __underlying_memset __builtin_memset | ^ This warning appears to be triggered due to the "count < len" check in vtpm_proxy_fops_read() splitting the CFG[1], and the compiler attempting to reason about the possible value range in len compared to the buffer size. In order to silence this warning, and to keep this code robust if the use of proxy_dev->req_len ever changes in the future, explicitly check the size of len before reaching the memset(). [1] https://lore.kernel.org/lkml/CAG48ez1iTF9KegKJrW5a3WzXgCPZJ73nS2_e5esKJRppdzvv8g@mail.gmail.com Cc: Peter Huewe Cc: Jarkko Sakkinen Cc: Jason Gunthorpe Cc: linux-integrity@vger.kernel.org Reviewed-by: Stefan Berger Link: https://lore.kernel.org/lkml/4b59d305-6858-1514-751a-37853ad777be@linux.ibm.com Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220119184354.3367603-1-keescook@chromium.org --- drivers/char/tpm/tpm_vtpm_proxy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c index 91c772e38bb54..5c865987ba5c1 100644 --- a/drivers/char/tpm/tpm_vtpm_proxy.c +++ b/drivers/char/tpm/tpm_vtpm_proxy.c @@ -91,7 +91,7 @@ static ssize_t vtpm_proxy_fops_read(struct file *filp, char __user *buf, len = proxy_dev->req_len; - if (count < len) { + if (count < len || len > sizeof(proxy_dev->buffer)) { mutex_unlock(&proxy_dev->buf_lock); pr_debug("Invalid size in recv: count=%zd, req_len=%zd\n", count, len); -- GitLab From fad278388e01e3658a356118bed8ee2c2408d280 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 May 2021 21:15:15 -0700 Subject: [PATCH 0691/1586] media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Arnd Bergmann Cc: Sakari Ailus Cc: linux-media@vger.kernel.org Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Signed-off-by: Kees Cook --- drivers/media/platform/omap3isp/ispstat.c | 5 +++-- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf8..68cf68dbcace2 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ffe..d9db7ad438908 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- GitLab From 451f0b6f4c44d7b649ae609157b114b71f6d7875 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Feb 2022 19:14:40 +0100 Subject: [PATCH 0692/1586] block: default BLOCK_LEGACY_AUTOLOAD to y As Luis reported, losetup currently doesn't properly create the loop device without this if the device node already exists because old scripts created it manually. So default to y for now and remove the aggressive removal schedule. Reported-by: Luis Chamberlain Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220225181440.1351591-1-hch@lst.de Signed-off-by: Jens Axboe --- block/Kconfig | 8 +++----- block/bdev.c | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/block/Kconfig b/block/Kconfig index 168b873eb666d..7eb5d6d53b3fc 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -28,15 +28,13 @@ if BLOCK config BLOCK_LEGACY_AUTOLOAD bool "Legacy autoloading support" + default y help Enable loading modules and creating block device instances based on accesses through their device special file. This is a historic Linux feature and makes no sense in a udev world where device files are - created on demand. - - Say N here unless booting or other functionality broke without it, in - which case you should also send a report to your distribution and - linux-block@vger.kernel.org. + created on demand, but scripts that manually create device nodes and + then call losetup might rely on this behavior. config BLK_RQ_ALLOC_TIME bool diff --git a/block/bdev.c b/block/bdev.c index c687726445660..a3632317c8aae 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -738,7 +738,7 @@ struct block_device *blkdev_get_no_open(dev_t dev) inode = ilookup(blockdev_superblock, dev); if (inode) pr_warn_ratelimited( -"block device autoloading is deprecated. It will be removed in Linux 5.19\n"); +"block device autoloading is deprecated and will be removed.\n"); } if (!inode) return NULL; -- GitLab From 4a09a845c1773fe3cd96c2afe737b58a88e3d30b Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 14 Jan 2022 16:58:54 +0100 Subject: [PATCH 0693/1586] block/rnbd-clt: fix CHECK:BRACES warning This patch fix the "CHECK:BRACES: braces {} should be used on all arms of this statement" warning from checkpatch Signed-off-by: Gioh Kim Signed-off-by: Md Haris Iqbal Link: https://lore.kernel.org/r/20220114155855.984144-2-haris.iqbal@ionos.com Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index c08971de369fc..f0370a345c3e7 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1262,9 +1262,9 @@ find_and_get_or_create_sess(const char *sessname, struct rtrs_clt_ops rtrs_ops; sess = find_or_create_sess(sessname, &first); - if (sess == ERR_PTR(-ENOMEM)) + if (sess == ERR_PTR(-ENOMEM)) { return ERR_PTR(-ENOMEM); - else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { + } else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { /* * A device MUST have its own session to use the polling-mode. * It must fail to map new device with the same session. -- GitLab From 030ce8ba97d2e85e5310b1ae8236cd640bd384d0 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 14 Jan 2022 16:58:55 +0100 Subject: [PATCH 0694/1586] block/rnbd: client device does not care queue/rotational On client side, the device is a network device. There is no reason to set rotational even-if the target device on server is rotational. Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Link: https://lore.kernel.org/r/20220114155855.984144-3-haris.iqbal@ionos.com Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 11 ++++++----- drivers/block/rnbd/rnbd-clt.h | 1 - drivers/block/rnbd/rnbd-proto.h | 4 ++-- drivers/block/rnbd/rnbd-srv.c | 1 - 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index f0370a345c3e7..9a880d559ab86 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -87,7 +87,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->discard_granularity = le32_to_cpu(rsp->discard_granularity); dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); dev->secure_discard = le16_to_cpu(rsp->secure_discard); - dev->rotational = rsp->rotational; dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); dev->fua = !!(rsp->cache_policy & RNBD_FUA); @@ -1410,8 +1409,10 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) dev->read_only = false; } - if (!dev->rotational) - blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); + /* + * Network device does not need rotational + */ + blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); err = add_disk(dev->gd); if (err) blk_cleanup_disk(dev->gd); @@ -1610,13 +1611,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, } rnbd_clt_info(dev, - "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n", + "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n", dev->gd->disk_name, dev->nsectors, dev->logical_block_size, dev->physical_block_size, dev->max_write_same_sectors, dev->max_discard_sectors, dev->discard_granularity, dev->discard_alignment, dev->secure_discard, dev->max_segments, - dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua); + dev->max_hw_sectors, dev->wc, dev->fua); mutex_unlock(&dev->lock); rnbd_clt_put_sess(sess); diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index 0c2cae7f39b9f..62bf7c3fa63c6 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -118,7 +118,6 @@ struct rnbd_clt_dev { enum rnbd_access_mode access_mode; u32 nr_poll_queues; bool read_only; - bool rotational; bool wc; bool fua; u32 max_hw_sectors; diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index de5d5a8df81d7..c4a68b3a1cbe8 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -128,7 +128,7 @@ enum rnbd_cache_policy { * @logical_block_size: logical block size device supports in bytes * @max_segments: max segments hardware support in one transfer * @secure_discard: supports secure discard - * @rotation: is a rotational disc? + * @obsolete_rotational: obsolete, not in used. * @cache_policy: support write-back caching or FUA? */ struct rnbd_msg_open_rsp { @@ -144,7 +144,7 @@ struct rnbd_msg_open_rsp { __le16 logical_block_size; __le16 max_segments; __le16 secure_discard; - u8 rotational; + u8 obsolete_rotational; u8 cache_policy; u8 reserved[10]; }; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 132e950685d59..6499efae5c43e 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -558,7 +558,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev)); rsp->secure_discard = cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); - rsp->rotational = !blk_queue_nonrot(q); rsp->cache_policy = 0; if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) rsp->cache_policy |= RNBD_WRITEBACK; -- GitLab From 24afc15dbe218f860994f627b4ba1fb09225a298 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 7 Feb 2022 21:48:19 +0100 Subject: [PATCH 0695/1586] block/rnbd: Remove a useless mutex According to lib/idr.c, The IDA handles its own locking. It is safe to call any of the IDA functions without synchronisation in your code. so the 'ida_lock' mutex can just be removed. It is here only to protect some ida_simple_get()/ida_simple_remove() calls. While at it, switch to ida_alloc_XXX()/ida_free() instead to ida_simple_get()/ida_simple_remove(). The latter is deprecated and more verbose. Signed-off-by: Christophe JAILLET Acked-by: Jack Wang Link: https://lore.kernel.org/r/7f9eccd8b1fce1bac45ac9b01a78cf72f54c0a61.1644266862.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 9a880d559ab86..1f63f308eb394 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -23,7 +23,6 @@ MODULE_LICENSE("GPL"); static int rnbd_client_major; static DEFINE_IDA(index_ida); -static DEFINE_MUTEX(ida_lock); static DEFINE_MUTEX(sess_lock); static LIST_HEAD(sess_list); @@ -55,9 +54,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) if (!refcount_dec_and_test(&dev->refcount)) return; - mutex_lock(&ida_lock); - ida_simple_remove(&index_ida, dev->clt_device_id); - mutex_unlock(&ida_lock); + ida_free(&index_ida, dev->clt_device_id); kfree(dev->hw_queues); kfree(dev->pathname); rnbd_clt_put_sess(dev->sess); @@ -1460,10 +1457,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, goto out_alloc; } - mutex_lock(&ida_lock); - ret = ida_simple_get(&index_ida, 0, 1 << (MINORBITS - RNBD_PART_BITS), - GFP_KERNEL); - mutex_unlock(&ida_lock); + ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS), + GFP_KERNEL); if (ret < 0) { pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n", pathname, sess->sessname, ret); -- GitLab From d9a74051a73c4fbd065ae806b8da151cbded84f1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Feb 2022 11:46:56 +0000 Subject: [PATCH 0696/1586] loop: clean up grammar in warning message The phrase "has still" should be "still has" to clean up the grammar. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20220208114656.61629-1-colin.i.king@gmail.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 01cbbfc4e9e24..bdea448d24194 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1262,7 +1262,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { /* If any pages were dirtied after invalidate_bdev(), try again */ err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, lo->lo_device->bd_inode->i_mapping->nrpages); goto out_unfreeze; @@ -1482,7 +1482,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) /* invalidate_bdev should have truncated all the pages */ if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, lo->lo_device->bd_inode->i_mapping->nrpages); goto out_unfreeze; -- GitLab From a75110c3b36959d04d3a586ba43510ddf9b410b5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Feb 2022 03:59:51 -0800 Subject: [PATCH 0697/1586] null_blk: fix return value from null_add_dev() The function nullb_device_power_store() returns -ENOMEM when null_add_dev() fails. null_add_dev() can fail with return value other than -ENOMEM such as -EINVAL when Zoned Block Device option is used, see : nullb_device_power_store() null_add_dev() null_init_zoned_dev() return -EINVAL; When trying to load the module having -ENOMEM value returned on the command line creates confusion when pleanty of memory is free on the machine. Instead of hardcoding -ENOMEM return the value of null_add_dev() function. Signed-off-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220215115951.15945-1-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 13004beb48cab..90b6bd2a114b2 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -431,9 +431,10 @@ static ssize_t nullb_device_power_store(struct config_item *item, if (!dev->power && newp) { if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) return count; - if (null_add_dev(dev)) { + ret = null_add_dev(dev); + if (ret) { clear_bit(NULLB_DEV_FL_UP, &dev->flags); - return -ENOMEM; + return ret; } set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); -- GitLab From b27824d31f09ea7b4a6ba2c1b18bd328df3e8bed Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Feb 2022 13:33:07 -0800 Subject: [PATCH 0698/1586] loop: use sysfs_emit() in the sysfs xxx show() sprintf does not know the PAGE_SIZE maximum of the temporary buffer used for outputting sysfs content and it's possible to overrun the PAGE_SIZE buffer length. Use a generic sysfs_emit function that knows the size of the temporary buffer and ensures that no overrun is done for offset attribute in loop_attr_[offset|sizelimit|autoclear|partscan|dio]_show() callbacks. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20220215213310.7264-2-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bdea448d24194..a55e5eda1d174 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -680,33 +680,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); - return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); - return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); - return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); -- GitLab From 0aab29b85478b994422f1551ce36e5640b09db2b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Feb 2022 13:33:08 -0800 Subject: [PATCH 0699/1586] loop: remove extra variable in lo_fallocate() The local variable q is used to pass it to the blk_queue_discard(). We can get away with using lo->lo_queue instead of storing in a local variable which is not used anywhere else. No functional change in this patch. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20220215213310.7264-3-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a55e5eda1d174..77c61eaaa6e4e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -308,12 +308,11 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, * a.k.a. discard/zerorange. */ struct file *file = lo->lo_backing_file; - struct request_queue *q = lo->lo_queue; int ret; mode |= FALLOC_FL_KEEP_SIZE; - if (!blk_queue_discard(q)) { + if (!blk_queue_discard(lo->lo_queue)) { ret = -EOPNOTSUPP; goto out; } -- GitLab From 9c64e38cc639537ad9b542635af1733e8cb5e19b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Feb 2022 13:33:09 -0800 Subject: [PATCH 0700/1586] loop: remove extra variable in lo_req_flush The local variable file is used to pass it to the vfs_fsync(). We can get away with using lo->lo_backing_file instead of storing in a local variable which is not used anywhere else. No functional change in this patch. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20220215213310.7264-4-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 77c61eaaa6e4e..18b30a56bfc47 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -326,8 +326,7 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, static int lo_req_flush(struct loop_device *lo, struct request *rq) { - struct file *file = lo->lo_backing_file; - int ret = vfs_fsync(file, 0); + int ret = vfs_fsync(lo->lo_backing_file, 0); if (unlikely(ret && ret != -EINVAL)) ret = -EIO; -- GitLab From ef44c50837ab7818920bd9994b36d6e573312abc Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Feb 2022 13:33:10 -0800 Subject: [PATCH 0701/1586] loop: allow user to set the queue depth Instead of hardcoding queue depth allow user to set the hw queue depth using module parameter. Set default value to 128 to retain the existing behavior. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20220215213310.7264-5-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 18b30a56bfc47..8fb89d0624fc5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -85,6 +85,7 @@ #include #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) +#define LOOP_DEFAULT_HW_Q_DEPTH (128) static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); @@ -1785,6 +1786,24 @@ module_param(max_loop, int, 0444); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); + +static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH; + +static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p) +{ + int ret = kstrtoint(s, 10, &hw_queue_depth); + + return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0; +} + +static const struct kernel_param_ops loop_hw_qdepth_param_ops = { + .set = loop_set_hw_queue_depth, + .get = param_get_int, +}; + +device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128"); + MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); @@ -1979,7 +1998,7 @@ static int loop_add(int i) lo->tag_set.ops = &loop_mq_ops; lo->tag_set.nr_hw_queues = 1; - lo->tag_set.queue_depth = 128; + lo->tag_set.queue_depth = hw_queue_depth; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | -- GitLab From 3d3472f3ed419fe1a9d3f881a4905fa8e03d750c Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 16 Feb 2022 09:29:45 -0800 Subject: [PATCH 0702/1586] null_blk: remove hardcoded alloc_cmd() parameter Only caller of alloc_cmd() is null_submit_bio() unconditionally sets second parameter to true and that is statically hard-coded in null_blk. There is no point in having statically hardcoded function parameter. Remove the unnecessary parameter can_wait and adjust the code so it can retain existing behavior of waiting when we don't get valid nullb_cmd from __alloc_cmd() in alloc_cmd(). The restructured code avoids multiple return statements, multiple calls to __alloc_cmd() and resulting a fast path call to prepare_to_wait() due to removal of first alloc_cmd() call. Follow the pattern that we have in bio_alloc() to set the structure members in the structure allocation function in alloc_cmd() and pass bio to initialize newly allocated cmd->bio member. Follow the pattern in copy_to_nullb() to use result of one function call (null_cache_active()) to be used as a parameter to another function call (null_insert_page()), use result of alloc_cmd() as a first parameter to the null_handle_cmd() in null_submit_bio() function. This allow us to remove the local variable cmd on stack in null_submit_bio() that is in fast path. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20220216172945.31124-2-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 90b6bd2a114b2..29e183719e773 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -720,26 +720,25 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) return NULL; } -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) +static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio) { struct nullb_cmd *cmd; DEFINE_WAIT(wait); - cmd = __alloc_cmd(nq); - if (cmd || !can_wait) - return cmd; - do { - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); + /* + * This avoids multiple return statements, multiple calls to + * __alloc_cmd() and a fast path call to prepare_to_wait(). + */ cmd = __alloc_cmd(nq); - if (cmd) - break; - + if (cmd) { + cmd->bio = bio; + return cmd; + } + prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); io_schedule(); + finish_wait(&nq->wait, &wait); } while (1); - - finish_wait(&nq->wait, &wait); - return cmd; } static void end_cmd(struct nullb_cmd *cmd) @@ -1477,12 +1476,8 @@ static void null_submit_bio(struct bio *bio) sector_t nr_sectors = bio_sectors(bio); struct nullb *nullb = bio->bi_bdev->bd_disk->private_data; struct nullb_queue *nq = nullb_to_queue(nullb); - struct nullb_cmd *cmd; - - cmd = alloc_cmd(nq, 1); - cmd->bio = bio; - null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio)); + null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); } static bool should_timeout_request(struct request *rq) -- GitLab From c90b6b50b42dfdfeb27b02e670e812b69ff364f5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Feb 2022 07:28:51 -0800 Subject: [PATCH 0703/1586] null_blk: remove hardcoded null_alloc_page() param Only caller of null_alloc_page() is null_insert_page() unconditionally sets only parameter to GFP_NOIO and that is statically hard-coded in null_blk. There is no point in having statically hardcoded function parameter. Remove the unnecessary parameter gfp_flags and adjust the code, so it can retain existing behavior null_alloc_page() with GFP_NOIO. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220222152852.26043-2-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 29e183719e773..80f9a6ba376d5 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -777,15 +777,15 @@ static void null_complete_rq(struct request *rq) end_cmd(blk_mq_rq_to_pdu(rq)); } -static struct nullb_page *null_alloc_page(gfp_t gfp_flags) +static struct nullb_page *null_alloc_page(void) { struct nullb_page *t_page; - t_page = kmalloc(sizeof(struct nullb_page), gfp_flags); + t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO); if (!t_page) goto out; - t_page->page = alloc_pages(gfp_flags, 0); + t_page->page = alloc_pages(GFP_NOIO, 0); if (!t_page->page) goto out_freepage; @@ -932,7 +932,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb, spin_unlock_irq(&nullb->lock); - t_page = null_alloc_page(GFP_NOIO); + t_page = null_alloc_page(); if (!t_page) goto out_lock; -- GitLab From df00b1d26c3c3ff9dae4b572a6ad878ab65334e1 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Feb 2022 07:28:52 -0800 Subject: [PATCH 0704/1586] null_blk: null_alloc_page() cleanup Remove goto labels and use direct returns as error unwinding code only needs to free t_page variable if we alloc_pages() call fails as having two labels for one kfree() can be avoided easily. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220222152852.26043-3-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 80f9a6ba376d5..05b1120e66234 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -783,18 +783,16 @@ static struct nullb_page *null_alloc_page(void) t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO); if (!t_page) - goto out; + return NULL; t_page->page = alloc_pages(GFP_NOIO, 0); - if (!t_page->page) - goto out_freepage; + if (!t_page->page) { + kfree(t_page); + return NULL; + } memset(t_page->bitmap, 0, sizeof(t_page->bitmap)); return t_page; -out_freepage: - kfree(t_page); -out: - return NULL; } static void null_free_page(struct nullb_page *t_page) -- GitLab From 77c436de01c0f29ba0b745196160184b22adaf60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Feb 2022 16:46:32 +0100 Subject: [PATCH 0705/1586] mpage: pass the operation to bio_alloc Refactor the mpage read/write page code to pass the op to bio_alloc instead of setting it just before the submission. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220222154634.597067-2-hch@lst.de Signed-off-by: Jens Axboe --- fs/mpage.c | 50 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/fs/mpage.c b/fs/mpage.c index dbfc02e23d97f..6c4b810a21d0a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -57,10 +57,9 @@ static void mpage_end_io(struct bio *bio) bio_put(bio); } -static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) +static struct bio *mpage_bio_submit(struct bio *bio) { bio->bi_end_io = mpage_end_io; - bio_set_op_attrs(bio, op, op_flags); guard_bio_eod(bio); submit_bio(bio); return NULL; @@ -146,16 +145,15 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) struct block_device *bdev = NULL; int length; int fully_mapped = 1; - int op_flags; + int op = REQ_OP_READ; unsigned nblocks; unsigned relative_block; gfp_t gfp; if (args->is_readahead) { - op_flags = REQ_RAHEAD; + op |= REQ_RAHEAD; gfp = readahead_gfp_mask(page->mapping); } else { - op_flags = 0; gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); } @@ -264,7 +262,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) * This page will go to BIO. Do we need to send this BIO off first? */ if (args->bio && (args->last_block_in_bio != blocks[0] - 1)) - args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); + args->bio = mpage_bio_submit(args->bio); alloc_new: if (args->bio == NULL) { @@ -273,7 +271,7 @@ alloc_new: page)) goto out; } - args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), 0, + args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), op, gfp); if (args->bio == NULL) goto confused; @@ -282,7 +280,7 @@ alloc_new: length = first_hole << blkbits; if (bio_add_page(args->bio, page, length, 0) < length) { - args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); + args->bio = mpage_bio_submit(args->bio); goto alloc_new; } @@ -290,7 +288,7 @@ alloc_new: nblocks = map_bh->b_size >> blkbits; if ((buffer_boundary(map_bh) && relative_block == nblocks) || (first_hole != blocks_per_page)) - args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); + args->bio = mpage_bio_submit(args->bio); else args->last_block_in_bio = blocks[blocks_per_page - 1]; out: @@ -298,7 +296,7 @@ out: confused: if (args->bio) - args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); + args->bio = mpage_bio_submit(args->bio); if (!PageUptodate(page)) block_read_full_page(page, args->get_block); else @@ -361,7 +359,7 @@ void mpage_readahead(struct readahead_control *rac, get_block_t get_block) put_page(page); } if (args.bio) - mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); + mpage_bio_submit(args.bio); } EXPORT_SYMBOL(mpage_readahead); @@ -378,7 +376,7 @@ int mpage_readpage(struct page *page, get_block_t get_block) args.bio = do_mpage_readpage(&args); if (args.bio) - mpage_bio_submit(REQ_OP_READ, 0, args.bio); + mpage_bio_submit(args.bio); return 0; } EXPORT_SYMBOL(mpage_readpage); @@ -469,7 +467,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int op_flags = wbc_to_write_flags(wbc); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -577,7 +574,7 @@ page_is_mapped: * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != blocks[0] - 1) - bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); + bio = mpage_bio_submit(bio); alloc_new: if (bio == NULL) { @@ -586,9 +583,10 @@ alloc_new: page, wbc)) goto out; } - bio = bio_alloc(bdev, BIO_MAX_VECS, 0, GFP_NOFS); + bio = bio_alloc(bdev, BIO_MAX_VECS, + REQ_OP_WRITE | wbc_to_write_flags(wbc), + GFP_NOFS); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); - wbc_init_bio(wbc, bio); bio->bi_write_hint = inode->i_write_hint; } @@ -601,7 +599,7 @@ alloc_new: wbc_account_cgroup_owner(wbc, page, PAGE_SIZE); length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); + bio = mpage_bio_submit(bio); goto alloc_new; } @@ -611,7 +609,7 @@ alloc_new: set_page_writeback(page); unlock_page(page); if (boundary || (first_unmapped != blocks_per_page)) { - bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); + bio = mpage_bio_submit(bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); @@ -623,7 +621,7 @@ alloc_new: confused: if (bio) - bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); + bio = mpage_bio_submit(bio); if (mpd->use_writepage) { ret = mapping->a_ops->writepage(page, wbc); @@ -679,11 +677,8 @@ mpage_writepages(struct address_space *mapping, }; ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); - if (mpd.bio) { - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - REQ_SYNC : 0); - mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); - } + if (mpd.bio) + mpage_bio_submit(mpd.bio); } blk_finish_plug(&plug); return ret; @@ -700,11 +695,8 @@ int mpage_writepage(struct page *page, get_block_t get_block, .use_writepage = 0, }; int ret = __mpage_writepage(page, wbc, &mpd); - if (mpd.bio) { - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - REQ_SYNC : 0); - mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); - } + if (mpd.bio) + mpage_bio_submit(mpd.bio); return ret; } EXPORT_SYMBOL(mpage_writepage); -- GitLab From 4c4dad11ff85e25b64dd62adee63463e40cc596b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Feb 2022 16:46:33 +0100 Subject: [PATCH 0706/1586] ext4: pass the operation to bio_alloc Refactor the readpage code to pass the op to bio_alloc instead of setting it just before the submission. Signed-off-by: Christoph Hellwig Acked-by: Theodore Ts'o Link: https://lore.kernel.org/r/20220222154634.597067-3-hch@lst.de Signed-off-by: Jens Axboe --- fs/ext4/page-io.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 1253982268730..35ada7baf41e5 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -372,10 +372,9 @@ void ext4_io_submit(struct ext4_io_submit *io) struct bio *bio = io->io_bio; if (bio) { - int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? - REQ_SYNC : 0; + if (io->io_wbc->sync_mode == WB_SYNC_ALL) + io->io_bio->bi_opf |= REQ_SYNC; io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint; - bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); submit_bio(io->io_bio); } io->io_bio = NULL; @@ -398,7 +397,7 @@ static void io_submit_init_bio(struct ext4_io_submit *io, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). */ - bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, 0, GFP_NOIO); + bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_end_io = ext4_end_bio; -- GitLab From fbe7c2ef5e1d1c218190c059432cdc87043c1275 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Feb 2022 16:46:34 +0100 Subject: [PATCH 0707/1586] nilfs2: pass the operation to bio_alloc Refactor the segbuf write code to pass the op to bio_alloc instead of setting it just before the submission. Signed-off-by: Christoph Hellwig Acked-by: Ryusuke Konishi Link: https://lore.kernel.org/r/20220222154634.597067-4-hch@lst.de Signed-off-by: Jens Axboe --- fs/nilfs2/segbuf.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 4f71faacd8253..a3bb0c856ec80 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -337,8 +337,7 @@ static void nilfs_end_bio_write(struct bio *bio) } static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi, int mode, - int mode_flags) + struct nilfs_write_info *wi) { struct bio *bio = wi->bio; int err; @@ -356,7 +355,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_end_io = nilfs_end_bio_write; bio->bi_private = segbuf; - bio_set_op_attrs(bio, mode, mode_flags); submit_bio(bio); segbuf->sb_nbio++; @@ -384,15 +382,15 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi, - struct buffer_head *bh, int mode) + struct buffer_head *bh) { int len, err; BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, 0, - GFP_NOIO); + wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, + REQ_OP_WRITE, GFP_NOIO); wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) << (wi->nilfs->ns_blocksize_bits - 9); } @@ -403,7 +401,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, return 0; } /* bio is FULL */ - err = nilfs_segbuf_submit_bio(segbuf, wi, mode, 0); + err = nilfs_segbuf_submit_bio(segbuf, wi); /* never submit current bh */ if (likely(!err)) goto repeat; @@ -433,13 +431,13 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, nilfs_segbuf_prepare_write(segbuf, &wi); list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh); if (unlikely(res)) goto failed_bio; } @@ -449,8 +447,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - res = nilfs_segbuf_submit_bio(segbuf, &wi, REQ_OP_WRITE, - REQ_SYNC); + wi.bio->bi_opf |= REQ_SYNC; + res = nilfs_segbuf_submit_bio(segbuf, &wi); } failed_bio: -- GitLab From 483546c11d702fd6f74c8c3f8123b7672def10b2 Mon Sep 17 00:00:00 2001 From: Nian Yanchuan Date: Mon, 28 Feb 2022 01:01:24 +0800 Subject: [PATCH 0708/1586] block: remove redundant semicolon Remove redundant semicolon from block/bdev.c Signed-off-by: Nian Yanchuan Link: https://lore.kernel.org/r/20220227170124.GA14658@localhost.localdomain Signed-off-by: Jens Axboe --- block/bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bdev.c b/block/bdev.c index a3632317c8aae..ce8de42a89bea 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -678,7 +678,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode) if (test_bit(GD_NEED_PART_SCAN, &disk->state)) bdev_disk_changed(disk, false); bdev->bd_openers++; - return 0;; + return 0; } static void blkdev_put_whole(struct block_device *bdev, fmode_t mode) -- GitLab From 686d303ee6301261b422ea51e64833d7909a2c36 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Fri, 25 Feb 2022 17:06:09 +0100 Subject: [PATCH 0709/1586] hwmon: (pmbus) Add mutex to regulator ops On PMBUS devices with multiple pages, the regulator ops need to be protected with the update mutex. This prevents accidentally changing the page in a separate thread while operating on the PMBUS_OPERATION register. Tested on Infineon xdpe11280 while a separate thread polls for sensor data. Signed-off-by: Patrick Rudolph Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/b991506bcbf665f7af185945f70bf9d5cf04637c.1645804976.git.sylv@sylv.io Fixes: ddbb4db4ced1b ("hwmon: (pmbus) Add regulator support") Cc: Alan Tull Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ac2fbee1ba9c0..b1386a4df4cc4 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2391,10 +2391,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); int ret; + mutex_lock(&data->update_lock); ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + mutex_unlock(&data->update_lock); + if (ret < 0) return ret; @@ -2405,11 +2409,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); + int ret; - return pmbus_update_byte_data(client, page, PMBUS_OPERATION, - PB_OPERATION_CONTROL_ON, - enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_lock(&data->update_lock); + ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION, + PB_OPERATION_CONTROL_ON, + enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_unlock(&data->update_lock); + + return ret; } static int pmbus_regulator_enable(struct regulator_dev *rdev) -- GitLab From 7f3cc8f897634b7e2d79bc2b7105e9ae6eaf4ac2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 10 Jan 2022 14:56:25 -0800 Subject: [PATCH 0710/1586] hwmon: Report attribute name with udev events Up to now udev events only report the affected hwmon device if an alert is reported. This requires userspace to read all attributes if it wants to know what triggered the event. Provide the attribute name with the NAME property to help userspace find the attribute causing the event. Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 3ae961986fc31..0b79c4e1bf99f 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -625,7 +625,9 @@ static const int __templates_size[] = { int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel) { + char event[MAX_SYSFS_ATTR_NAME_LENGTH + 5]; char sattr[MAX_SYSFS_ATTR_NAME_LENGTH]; + char *envp[] = { event, NULL }; const char * const *templates; const char *template; int base; @@ -641,8 +643,9 @@ int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type, base = hwmon_attr_base(type); scnprintf(sattr, MAX_SYSFS_ATTR_NAME_LENGTH, template, base + channel); + scnprintf(event, sizeof(event), "NAME=%s", sattr); sysfs_notify(&dev->kobj, NULL, sattr); - kobject_uevent(&dev->kobj, KOBJ_CHANGE); + kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); if (type == hwmon_temp) hwmon_thermal_notify(dev, channel); -- GitLab From 7c68c2c761d157203b64ebbb61dd7b5b6c32df61 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 11:42:38 -0800 Subject: [PATCH 0711/1586] hwmon: (lm83) Reorder include files to be in alphabetic order Reorder include files to be in alphabetic order to simplify driver maintenance. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 74fd7aa373a3e..44d720af24731 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -18,15 +18,15 @@ * http://www.national.com/pf/LM/LM82.html */ -#include +#include +#include #include -#include #include -#include -#include #include -#include +#include +#include #include +#include #include /* -- GitLab From 11e3377b9a439a5cf989bdb2b16d90e237542ec2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 11:49:04 -0800 Subject: [PATCH 0712/1586] hwmon: (lm83) Move lm83_id to avoid forward declaration There is no need to keep lm83_id at the end of the driver. Move it forward to where it is needed to avoid a forward declaration. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 44d720af24731..2bb4bceef551a 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -317,7 +317,12 @@ static int lm83_detect(struct i2c_client *new_client, return 0; } -static const struct i2c_device_id lm83_id[]; +static const struct i2c_device_id lm83_id[] = { + { "lm83", lm83 }, + { "lm82", lm82 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm83_id); static int lm83_probe(struct i2c_client *new_client) { @@ -352,13 +357,6 @@ static int lm83_probe(struct i2c_client *new_client) * Driver data (common to all clients) */ -static const struct i2c_device_id lm83_id[] = { - { "lm83", lm83 }, - { "lm82", lm82 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, lm83_id); - static struct i2c_driver lm83_driver = { .class = I2C_CLASS_HWMON, .driver = { -- GitLab From 81de0eea2bbc1b1334d40c4bb420219b603b4c45 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 16:23:31 -0800 Subject: [PATCH 0713/1586] hwmon: (lm83) Replace new_client with client It has no value to name a variable 'new_client' in probe and detect functions; it is obvious that the client is new. Use 'client' as variable name instead. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 2bb4bceef551a..fdd89cc481fa2 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -272,10 +272,10 @@ static const struct attribute_group lm83_group_opt = { */ /* Return 0 if detection is successful, -ENODEV otherwise */ -static int lm83_detect(struct i2c_client *new_client, +static int lm83_detect(struct i2c_client *client, struct i2c_board_info *info) { - struct i2c_adapter *adapter = new_client->adapter; + struct i2c_adapter *adapter = client->adapter; const char *name; u8 man_id, chip_id; @@ -283,20 +283,20 @@ static int lm83_detect(struct i2c_client *new_client, return -ENODEV; /* Detection */ - if ((i2c_smbus_read_byte_data(new_client, LM83_REG_R_STATUS1) & 0xA8) || - (i2c_smbus_read_byte_data(new_client, LM83_REG_R_STATUS2) & 0x48) || - (i2c_smbus_read_byte_data(new_client, LM83_REG_R_CONFIG) & 0x41)) { + if ((i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS1) & 0xA8) || + (i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS2) & 0x48) || + (i2c_smbus_read_byte_data(client, LM83_REG_R_CONFIG) & 0x41)) { dev_dbg(&adapter->dev, "LM83 detection failed at 0x%02x\n", - new_client->addr); + client->addr); return -ENODEV; } /* Identification */ - man_id = i2c_smbus_read_byte_data(new_client, LM83_REG_R_MAN_ID); + man_id = i2c_smbus_read_byte_data(client, LM83_REG_R_MAN_ID); if (man_id != 0x01) /* National Semiconductor */ return -ENODEV; - chip_id = i2c_smbus_read_byte_data(new_client, LM83_REG_R_CHIP_ID); + chip_id = i2c_smbus_read_byte_data(client, LM83_REG_R_CHIP_ID); switch (chip_id) { case 0x03: name = "lm83"; @@ -324,17 +324,17 @@ static const struct i2c_device_id lm83_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm83_id); -static int lm83_probe(struct i2c_client *new_client) +static int lm83_probe(struct i2c_client *client) { struct device *hwmon_dev; struct lm83_data *data; - data = devm_kzalloc(&new_client->dev, sizeof(struct lm83_data), + data = devm_kzalloc(&client->dev, sizeof(struct lm83_data), GFP_KERNEL); if (!data) return -ENOMEM; - data->client = new_client; + data->client = client; mutex_init(&data->update_lock); /* @@ -344,11 +344,11 @@ static int lm83_probe(struct i2c_client *new_client) * declare 1 and 3 common, and then 2 and 4 only for the LM83. */ data->groups[0] = &lm83_group; - if (i2c_match_id(lm83_id, new_client)->driver_data == lm83) + if (i2c_match_id(lm83_id, client)->driver_data == lm83) data->groups[1] = &lm83_group_opt; - hwmon_dev = devm_hwmon_device_register_with_groups(&new_client->dev, - new_client->name, + hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev, + client->name, data, data->groups); return PTR_ERR_OR_ZERO(hwmon_dev); } -- GitLab From 719af4f1a40bdf46cab7e4db216af7084a70897b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 16:22:00 -0800 Subject: [PATCH 0714/1586] hwmon: (lm83) Use regmap Using local caching in this driver had few benefits. It used cached values for two seconds and then re-read all registers from the chip even if the user only accessed a single attribute. On top of that, alarm attributes were stale for up to four seconds (the first status register read reports and clears an alarm, the second reports it cleared). Use regmap instead for caching. Do not re-read non-volatile registers, and do not cache volatile registers. As part of this change, handle register read and write address differences in regmap code. This is necessary to avoid problems with caching in the regmap core, and ultimately simplifies the code. Also, errors observed when reading from and writing to registers are no longer ignored. Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 1 + drivers/hwmon/lm83.c | 176 ++++++++++++++++++++++++++---------------- 2 files changed, 112 insertions(+), 65 deletions(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 8df25f1079bac..01ab80a2cc4a1 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1288,6 +1288,7 @@ config SENSORS_LM80 config SENSORS_LM83 tristate "National Semiconductor LM83 and compatibles" depends on I2C + select REGMAP help If you say yes here you get support for National Semiconductor LM82 and LM83 sensor chips. diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index fdd89cc481fa2..c9605957e400e 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -21,11 +21,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include @@ -77,7 +77,7 @@ enum chips { lm83, lm82 }; (val) < 0 ? ((val) - 500) / 1000 : \ ((val) + 500) / 1000) -static const u8 LM83_REG_R_TEMP[] = { +static const u8 LM83_REG_TEMP[] = { LM83_REG_R_LOCAL_TEMP, LM83_REG_R_REMOTE1_TEMP, LM83_REG_R_REMOTE2_TEMP, @@ -89,62 +89,82 @@ static const u8 LM83_REG_R_TEMP[] = { LM83_REG_R_TCRIT, }; -static const u8 LM83_REG_W_HIGH[] = { - LM83_REG_W_LOCAL_HIGH, - LM83_REG_W_REMOTE1_HIGH, - LM83_REG_W_REMOTE2_HIGH, - LM83_REG_W_REMOTE3_HIGH, - LM83_REG_W_TCRIT, -}; - /* * Client data (each client gets its own) */ struct lm83_data { - struct i2c_client *client; + struct regmap *regmap; const struct attribute_group *groups[3]; - struct mutex update_lock; - bool valid; /* false until following fields are valid */ - unsigned long last_updated; /* in jiffies */ - - /* registers values */ - s8 temp[9]; /* 0..3: input 1-4, - 4..7: high limit 1-4, - 8 : critical limit */ - u16 alarms; /* bitvector, combined */ }; -static struct lm83_data *lm83_update_device(struct device *dev) +/* regmap code */ + +static int lm83_regmap_reg_read(void *context, unsigned int reg, unsigned int *val) { - struct lm83_data *data = dev_get_drvdata(dev); - struct i2c_client *client = data->client; - - mutex_lock(&data->update_lock); - - if (time_after(jiffies, data->last_updated + HZ * 2) || !data->valid) { - int nr; - - dev_dbg(&client->dev, "Updating lm83 data.\n"); - for (nr = 0; nr < 9; nr++) { - data->temp[nr] = - i2c_smbus_read_byte_data(client, - LM83_REG_R_TEMP[nr]); - } - data->alarms = - i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS1) - + (i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS2) - << 8); - - data->last_updated = jiffies; - data->valid = true; + struct i2c_client *client = context; + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + if (ret < 0) + return ret; + + *val = ret; + return 0; +} + +/* + * The regmap write function maps read register addresses to write register + * addresses. This is necessary for regmap register caching to work. + * An alternative would be to clear the regmap cache whenever a register is + * written, but that would be much more expensive. + */ +static int lm83_regmap_reg_write(void *context, unsigned int reg, unsigned int val) +{ + struct i2c_client *client = context; + + switch (reg) { + case LM83_REG_R_CONFIG: + case LM83_REG_R_LOCAL_HIGH: + case LM83_REG_R_REMOTE2_HIGH: + reg += 0x06; + break; + case LM83_REG_R_REMOTE1_HIGH: + case LM83_REG_R_REMOTE3_HIGH: + case LM83_REG_R_TCRIT: + reg += 0x18; + break; + default: + break; } - mutex_unlock(&data->update_lock); + return i2c_smbus_write_byte_data(client, reg, val); +} - return data; +static bool lm83_regmap_is_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case LM83_REG_R_LOCAL_TEMP: + case LM83_REG_R_REMOTE1_TEMP: + case LM83_REG_R_REMOTE2_TEMP: + case LM83_REG_R_REMOTE3_TEMP: + case LM83_REG_R_STATUS1: + case LM83_REG_R_STATUS2: + return true; + default: + return false; + } } +static const struct regmap_config lm83_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .cache_type = REGCACHE_RBTREE, + .volatile_reg = lm83_regmap_is_volatile, + .reg_read = lm83_regmap_reg_read, + .reg_write = lm83_regmap_reg_write, +}; + /* * Sysfs stuff */ @@ -153,8 +173,15 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *devattr, char *buf) { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct lm83_data *data = lm83_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[attr->index])); + struct lm83_data *data = dev_get_drvdata(dev); + unsigned int regval; + int ret; + + ret = regmap_read(data->regmap, LM83_REG_TEMP[attr->index], ®val); + if (ret) + return ret; + + return sprintf(buf, "%d\n", TEMP_FROM_REG((s8)regval)); } static ssize_t temp_store(struct device *dev, @@ -163,38 +190,57 @@ static ssize_t temp_store(struct device *dev, { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm83_data *data = dev_get_drvdata(dev); - struct i2c_client *client = data->client; + unsigned int regval; long val; - int nr = attr->index; int err; err = kstrtol(buf, 10, &val); if (err < 0) return err; - mutex_lock(&data->update_lock); - data->temp[nr] = TEMP_TO_REG(val); - i2c_smbus_write_byte_data(client, LM83_REG_W_HIGH[nr - 4], - data->temp[nr]); - mutex_unlock(&data->update_lock); - return count; + regval = TEMP_TO_REG(val); + err = regmap_write(data->regmap, LM83_REG_TEMP[attr->index], regval); + return err ? : count; } static ssize_t alarms_show(struct device *dev, struct device_attribute *dummy, char *buf) { - struct lm83_data *data = lm83_update_device(dev); - return sprintf(buf, "%d\n", data->alarms); + struct lm83_data *data = dev_get_drvdata(dev); + unsigned int alarms, regval; + int err; + + err = regmap_read(data->regmap, LM83_REG_R_STATUS1, ®val); + if (err < 0) + return err; + alarms = regval; + err = regmap_read(data->regmap, LM83_REG_R_STATUS2, ®val); + if (err < 0) + return err; + alarms |= regval << 8; + + return sprintf(buf, "%u\n", alarms); } static ssize_t alarm_show(struct device *dev, struct device_attribute *devattr, char *buf) { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct lm83_data *data = lm83_update_device(dev); + struct lm83_data *data = dev_get_drvdata(dev); int bitnr = attr->index; - - return sprintf(buf, "%d\n", (data->alarms >> bitnr) & 1); + unsigned int alarm; + int reg, err; + + if (bitnr < 8) { + reg = LM83_REG_R_STATUS1; + } else { + reg = LM83_REG_R_STATUS2; + bitnr -= 8; + } + err = regmap_read(data->regmap, reg, &alarm); + if (err < 0) + return err; + return sprintf(buf, "%d\n", (alarm >> bitnr) & 1); } static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, 0); @@ -326,16 +372,17 @@ MODULE_DEVICE_TABLE(i2c, lm83_id); static int lm83_probe(struct i2c_client *client) { + struct device *dev = &client->dev; struct device *hwmon_dev; struct lm83_data *data; - data = devm_kzalloc(&client->dev, sizeof(struct lm83_data), - GFP_KERNEL); + data = devm_kzalloc(dev, sizeof(struct lm83_data), GFP_KERNEL); if (!data) return -ENOMEM; - data->client = client; - mutex_init(&data->update_lock); + data->regmap = devm_regmap_init(dev, NULL, client, &lm83_regmap_config); + if (IS_ERR(data->regmap)) + return PTR_ERR(data->regmap); /* * Register sysfs hooks @@ -347,8 +394,7 @@ static int lm83_probe(struct i2c_client *client) if (i2c_match_id(lm83_id, client)->driver_data == lm83) data->groups[1] = &lm83_group_opt; - hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev, - client->name, + hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, data->groups); return PTR_ERR_OR_ZERO(hwmon_dev); } -- GitLab From 362c5663e8760b145a67adb3a7704de401f6ceab Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 19:03:04 -0800 Subject: [PATCH 0715/1586] hwmon: (lm83) Replace temperature conversion macros with standard functions Replace TEMP_FROM_REG with direct calculation and TEMP_TO_REG with standard functions/macros. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index c9605957e400e..434bd5b903d2a 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -66,17 +66,6 @@ enum chips { lm83, lm82 }; #define LM83_REG_R_TCRIT 0x42 #define LM83_REG_W_TCRIT 0x5A -/* - * Conversions and various macros - * The LM83 uses signed 8-bit values with LSB = 1 degree Celsius. - */ - -#define TEMP_FROM_REG(val) ((val) * 1000) -#define TEMP_TO_REG(val) ((val) <= -128000 ? -128 : \ - (val) >= 127000 ? 127 : \ - (val) < 0 ? ((val) - 500) / 1000 : \ - ((val) + 500) / 1000) - static const u8 LM83_REG_TEMP[] = { LM83_REG_R_LOCAL_TEMP, LM83_REG_R_REMOTE1_TEMP, @@ -181,7 +170,7 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *devattr, if (ret) return ret; - return sprintf(buf, "%d\n", TEMP_FROM_REG((s8)regval)); + return sprintf(buf, "%d\n", (s8)regval * 1000); } static ssize_t temp_store(struct device *dev, @@ -198,7 +187,7 @@ static ssize_t temp_store(struct device *dev, if (err < 0) return err; - regval = TEMP_TO_REG(val); + regval = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000); err = regmap_write(data->regmap, LM83_REG_TEMP[attr->index], regval); return err ? : count; } -- GitLab From 4d63c2d31a8f4c68458422fc5d0639a16237b426 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 23:49:59 -0800 Subject: [PATCH 0716/1586] hwmon: (lm83) Demote log message if chip identification fails There should be no message in the kernel function if the detect function fails to identify a chip; this is perfectly normal and does not warrant a kernel log entry. Demote message to debug. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 434bd5b903d2a..82d7ef264f6f1 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -341,9 +341,9 @@ static int lm83_detect(struct i2c_client *client, break; default: /* identification failed */ - dev_info(&adapter->dev, - "Unsupported chip (man_id=0x%02X, chip_id=0x%02X)\n", - man_id, chip_id); + dev_dbg(&adapter->dev, + "Unsupported chip (man_id=0x%02X, chip_id=0x%02X)\n", + man_id, chip_id); return -ENODEV; } -- GitLab From 913ac02ade57493fae1fa030f5a33934ef80a254 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 24 Dec 2021 11:31:32 -0800 Subject: [PATCH 0717/1586] hwmon: (lm83) Explain why LM82 may be misdetected as LM83 According to the March 2013 revision of the LM82 datasheet, the latest LM82 die revision is 0x03. This was confirmed and observed with a real chip. Further details in this revision of the LM82 datasheet suggest that LM82 is now just a repackaged LM83. Such versions of LM82 will be detected as LM83. Add comment to the code explaining why this may happen. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 82d7ef264f6f1..d9ee01ca8aed3 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -334,6 +334,14 @@ static int lm83_detect(struct i2c_client *client, chip_id = i2c_smbus_read_byte_data(client, LM83_REG_R_CHIP_ID); switch (chip_id) { case 0x03: + /* + * According to the LM82 datasheet dated March 2013, recent + * revisions of LM82 have a die revision of 0x03. This was + * confirmed with a real chip. Further details in this revision + * of the LM82 datasheet strongly suggest that LM82 is just a + * repackaged LM83. It is therefore impossible to distinguish + * those chips from LM83, and they will be misdetected as LM83. + */ name = "lm83"; break; case 0x01: -- GitLab From c291f612a813a1f295266a3de5cb418e48f41440 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Dec 2021 20:24:02 -0800 Subject: [PATCH 0718/1586] hwmon: (lm83) Convert to use with_info API Use with_info API to reduce code size and simplify the code. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm83.c | 315 +++++++++++++++++++++++++------------------ 1 file changed, 183 insertions(+), 132 deletions(-) diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index d9ee01ca8aed3..12370dcefa6ab 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -18,11 +18,11 @@ * http://www.national.com/pf/LM/LM82.html */ +#include #include #include #include #include -#include #include #include #include @@ -71,11 +71,30 @@ static const u8 LM83_REG_TEMP[] = { LM83_REG_R_REMOTE1_TEMP, LM83_REG_R_REMOTE2_TEMP, LM83_REG_R_REMOTE3_TEMP, +}; + +static const u8 LM83_REG_MAX[] = { LM83_REG_R_LOCAL_HIGH, LM83_REG_R_REMOTE1_HIGH, LM83_REG_R_REMOTE2_HIGH, LM83_REG_R_REMOTE3_HIGH, - LM83_REG_R_TCRIT, +}; + +/* alarm and fault registers and bits, indexed by channel */ +static const u8 LM83_ALARM_REG[] = { + LM83_REG_R_STATUS1, LM83_REG_R_STATUS2, LM83_REG_R_STATUS1, LM83_REG_R_STATUS2 +}; + +static const u8 LM83_MAX_ALARM_BIT[] = { + BIT(6), BIT(7), BIT(4), BIT(4) +}; + +static const u8 LM83_CRIT_ALARM_BIT[] = { + BIT(0), BIT(0), BIT(1), BIT(1) +}; + +static const u8 LM83_FAULT_BIT[] = { + 0, BIT(5), BIT(2), BIT(2) }; /* @@ -84,7 +103,7 @@ static const u8 LM83_REG_TEMP[] = { struct lm83_data { struct regmap *regmap; - const struct attribute_group *groups[3]; + enum chips type; }; /* regmap code */ @@ -154,157 +173,197 @@ static const struct regmap_config lm83_regmap_config = { .reg_write = lm83_regmap_reg_write, }; -/* - * Sysfs stuff - */ +/* hwmon API */ -static ssize_t temp_show(struct device *dev, struct device_attribute *devattr, - char *buf) +static int lm83_temp_read(struct device *dev, u32 attr, int channel, long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm83_data *data = dev_get_drvdata(dev); unsigned int regval; - int ret; - - ret = regmap_read(data->regmap, LM83_REG_TEMP[attr->index], ®val); - if (ret) - return ret; + int err; - return sprintf(buf, "%d\n", (s8)regval * 1000); + switch (attr) { + case hwmon_temp_input: + err = regmap_read(data->regmap, LM83_REG_TEMP[channel], ®val); + if (err < 0) + return err; + *val = (s8)regval * 1000; + break; + case hwmon_temp_max: + err = regmap_read(data->regmap, LM83_REG_MAX[channel], ®val); + if (err < 0) + return err; + *val = (s8)regval * 1000; + break; + case hwmon_temp_crit: + err = regmap_read(data->regmap, LM83_REG_R_TCRIT, ®val); + if (err < 0) + return err; + *val = (s8)regval * 1000; + break; + case hwmon_temp_max_alarm: + err = regmap_read(data->regmap, LM83_ALARM_REG[channel], ®val); + if (err < 0) + return err; + *val = !!(regval & LM83_MAX_ALARM_BIT[channel]); + break; + case hwmon_temp_crit_alarm: + err = regmap_read(data->regmap, LM83_ALARM_REG[channel], ®val); + if (err < 0) + return err; + *val = !!(regval & LM83_CRIT_ALARM_BIT[channel]); + break; + case hwmon_temp_fault: + err = regmap_read(data->regmap, LM83_ALARM_REG[channel], ®val); + if (err < 0) + return err; + *val = !!(regval & LM83_FAULT_BIT[channel]); + break; + default: + return -EOPNOTSUPP; + } + return 0; } -static ssize_t temp_store(struct device *dev, - struct device_attribute *devattr, const char *buf, - size_t count) +static int lm83_temp_write(struct device *dev, u32 attr, int channel, long val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm83_data *data = dev_get_drvdata(dev); unsigned int regval; - long val; int err; - err = kstrtol(buf, 10, &val); - if (err < 0) - return err; - regval = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000); - err = regmap_write(data->regmap, LM83_REG_TEMP[attr->index], regval); - return err ? : count; + + switch (attr) { + case hwmon_temp_max: + err = regmap_write(data->regmap, LM83_REG_MAX[channel], regval); + if (err < 0) + return err; + break; + case hwmon_temp_crit: + err = regmap_write(data->regmap, LM83_REG_R_TCRIT, regval); + if (err < 0) + return err; + break; + default: + return -EOPNOTSUPP; + } + return 0; } -static ssize_t alarms_show(struct device *dev, struct device_attribute *dummy, - char *buf) +static int lm83_chip_read(struct device *dev, u32 attr, int channel, long *val) { struct lm83_data *data = dev_get_drvdata(dev); - unsigned int alarms, regval; + unsigned int regval; int err; - err = regmap_read(data->regmap, LM83_REG_R_STATUS1, ®val); - if (err < 0) - return err; - alarms = regval; - err = regmap_read(data->regmap, LM83_REG_R_STATUS2, ®val); - if (err < 0) - return err; - alarms |= regval << 8; + switch (attr) { + case hwmon_chip_alarms: + err = regmap_read(data->regmap, LM83_REG_R_STATUS1, ®val); + if (err < 0) + return err; + *val = regval; + err = regmap_read(data->regmap, LM83_REG_R_STATUS2, ®val); + if (err < 0) + return err; + *val |= regval << 8; + return 0; + default: + return -EOPNOTSUPP; + } - return sprintf(buf, "%u\n", alarms); + return 0; } -static ssize_t alarm_show(struct device *dev, - struct device_attribute *devattr, char *buf) +static int lm83_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct lm83_data *data = dev_get_drvdata(dev); - int bitnr = attr->index; - unsigned int alarm; - int reg, err; - - if (bitnr < 8) { - reg = LM83_REG_R_STATUS1; - } else { - reg = LM83_REG_R_STATUS2; - bitnr -= 8; + switch (type) { + case hwmon_chip: + return lm83_chip_read(dev, attr, channel, val); + case hwmon_temp: + return lm83_temp_read(dev, attr, channel, val); + default: + return -EOPNOTSUPP; } - err = regmap_read(data->regmap, reg, &alarm); - if (err < 0) - return err; - return sprintf(buf, "%d\n", (alarm >> bitnr) & 1); } -static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, 0); -static SENSOR_DEVICE_ATTR_RO(temp2_input, temp, 1); -static SENSOR_DEVICE_ATTR_RO(temp3_input, temp, 2); -static SENSOR_DEVICE_ATTR_RO(temp4_input, temp, 3); -static SENSOR_DEVICE_ATTR_RW(temp1_max, temp, 4); -static SENSOR_DEVICE_ATTR_RW(temp2_max, temp, 5); -static SENSOR_DEVICE_ATTR_RW(temp3_max, temp, 6); -static SENSOR_DEVICE_ATTR_RW(temp4_max, temp, 7); -static SENSOR_DEVICE_ATTR_RO(temp1_crit, temp, 8); -static SENSOR_DEVICE_ATTR_RO(temp2_crit, temp, 8); -static SENSOR_DEVICE_ATTR_RW(temp3_crit, temp, 8); -static SENSOR_DEVICE_ATTR_RO(temp4_crit, temp, 8); - -/* Individual alarm files */ -static SENSOR_DEVICE_ATTR_RO(temp1_crit_alarm, alarm, 0); -static SENSOR_DEVICE_ATTR_RO(temp3_crit_alarm, alarm, 1); -static SENSOR_DEVICE_ATTR_RO(temp3_fault, alarm, 2); -static SENSOR_DEVICE_ATTR_RO(temp3_max_alarm, alarm, 4); -static SENSOR_DEVICE_ATTR_RO(temp1_max_alarm, alarm, 6); -static SENSOR_DEVICE_ATTR_RO(temp2_crit_alarm, alarm, 8); -static SENSOR_DEVICE_ATTR_RO(temp4_crit_alarm, alarm, 9); -static SENSOR_DEVICE_ATTR_RO(temp4_fault, alarm, 10); -static SENSOR_DEVICE_ATTR_RO(temp4_max_alarm, alarm, 12); -static SENSOR_DEVICE_ATTR_RO(temp2_fault, alarm, 13); -static SENSOR_DEVICE_ATTR_RO(temp2_max_alarm, alarm, 15); -/* Raw alarm file for compatibility */ -static DEVICE_ATTR_RO(alarms); - -static struct attribute *lm83_attributes[] = { - &sensor_dev_attr_temp1_input.dev_attr.attr, - &sensor_dev_attr_temp3_input.dev_attr.attr, - &sensor_dev_attr_temp1_max.dev_attr.attr, - &sensor_dev_attr_temp3_max.dev_attr.attr, - &sensor_dev_attr_temp1_crit.dev_attr.attr, - &sensor_dev_attr_temp3_crit.dev_attr.attr, - - &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr, - &sensor_dev_attr_temp3_crit_alarm.dev_attr.attr, - &sensor_dev_attr_temp3_fault.dev_attr.attr, - &sensor_dev_attr_temp3_max_alarm.dev_attr.attr, - &sensor_dev_attr_temp1_max_alarm.dev_attr.attr, - &dev_attr_alarms.attr, - NULL -}; +static int lm83_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + switch (type) { + case hwmon_temp: + return lm83_temp_write(dev, attr, channel, val); + default: + return -EOPNOTSUPP; + } +} -static const struct attribute_group lm83_group = { - .attrs = lm83_attributes, -}; +static umode_t lm83_is_visible(const void *_data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct lm83_data *data = _data; -static struct attribute *lm83_attributes_opt[] = { - &sensor_dev_attr_temp2_input.dev_attr.attr, - &sensor_dev_attr_temp4_input.dev_attr.attr, - &sensor_dev_attr_temp2_max.dev_attr.attr, - &sensor_dev_attr_temp4_max.dev_attr.attr, - &sensor_dev_attr_temp2_crit.dev_attr.attr, - &sensor_dev_attr_temp4_crit.dev_attr.attr, - - &sensor_dev_attr_temp2_crit_alarm.dev_attr.attr, - &sensor_dev_attr_temp4_crit_alarm.dev_attr.attr, - &sensor_dev_attr_temp4_fault.dev_attr.attr, - &sensor_dev_attr_temp4_max_alarm.dev_attr.attr, - &sensor_dev_attr_temp2_fault.dev_attr.attr, - &sensor_dev_attr_temp2_max_alarm.dev_attr.attr, + /* + * LM82 only supports a single external channel, modeled as channel 2. + */ + if (data->type == lm82 && (channel == 1 || channel == 3)) + return 0; + + switch (type) { + case hwmon_chip: + if (attr == hwmon_chip_alarms) + return 0444; + break; + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_max_alarm: + case hwmon_temp_crit_alarm: + return 0444; + case hwmon_temp_fault: + if (channel) + return 0444; + break; + case hwmon_temp_max: + return 0644; + case hwmon_temp_crit: + if (channel == 2) + return 0644; + return 0444; + default: + break; + } + break; + default: + break; + } + return 0; +} + +static const struct hwmon_channel_info *lm83_info[] = { + HWMON_CHANNEL_INFO(chip, HWMON_C_ALARMS), + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | + HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | + HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | + HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | + HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT + ), NULL }; -static const struct attribute_group lm83_group_opt = { - .attrs = lm83_attributes_opt, +static const struct hwmon_ops lm83_hwmon_ops = { + .is_visible = lm83_is_visible, + .read = lm83_read, + .write = lm83_write, }; -/* - * Real code - */ +static const struct hwmon_chip_info lm83_chip_info = { + .ops = &lm83_hwmon_ops, + .info = lm83_info, +}; /* Return 0 if detection is successful, -ENODEV otherwise */ static int lm83_detect(struct i2c_client *client, @@ -381,18 +440,10 @@ static int lm83_probe(struct i2c_client *client) if (IS_ERR(data->regmap)) return PTR_ERR(data->regmap); - /* - * Register sysfs hooks - * The LM82 can only monitor one external diode which is - * at the same register as the LM83 temp3 entry - so we - * declare 1 and 3 common, and then 2 and 4 only for the LM83. - */ - data->groups[0] = &lm83_group; - if (i2c_match_id(lm83_id, client)->driver_data == lm83) - data->groups[1] = &lm83_group_opt; + data->type = i2c_match_id(lm83_id, client)->driver_data; - hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, - data, data->groups); + hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, + data, &lm83_chip_info, NULL); return PTR_ERR_OR_ZERO(hwmon_dev); } -- GitLab From b68437ace4b8636c5c8686c0334cbe5d029557bc Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Thu, 13 Jan 2022 08:46:29 -0800 Subject: [PATCH 0719/1586] hwmon: (nct6775) add support for TSI temperature registers These registers report CPU temperatures (and, depending on the system, sometimes chipset temperatures) via the TSI interface on AMD systems. They're distinct from most of the other Super-IO temperature readings (CPUTIN, SYSTIN, etc.) in that they're not a selectable source for monitoring and are in a different (higher resolution) format, but can still provide useful temperature data. Signed-off-by: Zev Weiss Tested-by: Renze Nicolai Link: https://lore.kernel.org/r/20220113164629.21924-1-zev@bewilderbeest.net Tested-by: Oleksandr Natalenko Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 129 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 098d12b9ecdad..63e15b5f93a0e 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -308,6 +308,7 @@ static void superio_exit(struct nct6775_sio_data *sio_data) #define NUM_TEMP 10 /* Max number of temp attribute sets w/ limits*/ #define NUM_TEMP_FIXED 6 /* Max number of fixed temp attribute sets */ +#define NUM_TSI_TEMP 8 /* Max number of TSI temp register pairs */ #define NUM_REG_ALARM 7 /* Max number of alarm registers */ #define NUM_REG_BEEP 5 /* Max number of beep registers */ @@ -498,6 +499,8 @@ static const u16 NCT6775_REG_TEMP_CRIT[32] = { [11] = 0xa07 }; +static const u16 NCT6775_REG_TSI_TEMP[] = { 0x669 }; + /* NCT6776 specific data */ /* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ @@ -581,6 +584,9 @@ static const u16 NCT6776_REG_TEMP_CRIT[32] = { [12] = 0x70a, }; +static const u16 NCT6776_REG_TSI_TEMP[] = { + 0x409, 0x40b, 0x40d, 0x40f, 0x411, 0x413, 0x415, 0x417 }; + /* NCT6779 specific data */ static const u16 NCT6779_REG_IN[] = { @@ -864,6 +870,8 @@ static const char *const nct6796_temp_label[] = { #define NCT6796_TEMP_MASK 0xbfff0ffe #define NCT6796_VIRT_TEMP_MASK 0x80000c00 +static const u16 NCT6796_REG_TSI_TEMP[] = { 0x409, 0x40b }; + static const char *const nct6798_temp_label[] = { "", "SYSTIN", @@ -1005,6 +1013,8 @@ static const u16 NCT6106_REG_TEMP_CRIT[32] = { [12] = 0x205, }; +static const u16 NCT6106_REG_TSI_TEMP[] = { 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x67 }; + /* NCT6112D/NCT6114D/NCT6116D specific data */ static const u16 NCT6116_REG_FAN[] = { 0x20, 0x22, 0x24, 0x26, 0x28 }; @@ -1069,6 +1079,8 @@ static const s8 NCT6116_BEEP_BITS[] = { 34, -1 /* intrusion0, intrusion1 */ }; +static const u16 NCT6116_REG_TSI_TEMP[] = { 0x59, 0x5b }; + static enum pwm_enable reg_to_pwm_enable(int pwm, int mode) { if (mode == 0 && pwm == 255) @@ -1169,6 +1181,12 @@ static inline u8 in_to_reg(u32 val, u8 nr) return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255); } +/* TSI temperatures are in 8.3 format */ +static inline unsigned int tsi_temp_from_reg(unsigned int reg) +{ + return (reg >> 5) * 125; +} + /* * Data structures and manipulation thereof */ @@ -1179,7 +1197,7 @@ struct nct6775_data { enum kinds kind; const char *name; - const struct attribute_group *groups[6]; + const struct attribute_group *groups[7]; u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst, * 3=temp_crit, 4=temp_lcrit @@ -1240,6 +1258,8 @@ struct nct6775_data { const u16 *REG_ALARM; const u16 *REG_BEEP; + const u16 *REG_TSI_TEMP; + unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg); unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg); @@ -1267,6 +1287,7 @@ struct nct6775_data { s8 temp_offset[NUM_TEMP_FIXED]; s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst, * 3=temp_crit, 4=temp_lcrit */ + s16 tsi_temp[NUM_TSI_TEMP]; u64 alarms; u64 beeps; @@ -1315,6 +1336,7 @@ struct nct6775_data { u16 have_temp; u16 have_temp_fixed; + u16 have_tsi_temp; u16 have_in; /* Remember extra register values over suspend/resume */ @@ -1464,13 +1486,15 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) switch (data->kind) { case nct6106: return reg == 0x20 || reg == 0x22 || reg == 0x24 || + (reg >= 0x59 && reg < 0x69 && (reg & 1)) || reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0x111 || reg == 0x121 || reg == 0x131; case nct6116: return reg == 0x20 || reg == 0x22 || reg == 0x24 || - reg == 0x26 || reg == 0x28 || reg == 0xe0 || reg == 0xe2 || - reg == 0xe4 || reg == 0xe6 || reg == 0xe8 || reg == 0x111 || - reg == 0x121 || reg == 0x131 || reg == 0x191 || reg == 0x1a1; + reg == 0x26 || reg == 0x28 || reg == 0x59 || reg == 0x5b || + reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0xe6 || + reg == 0xe8 || reg == 0x111 || reg == 0x121 || reg == 0x131 || + reg == 0x191 || reg == 0x1a1; case nct6775: return (((reg & 0xff00) == 0x100 || (reg & 0xff00) == 0x200) && @@ -1479,7 +1503,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) (reg & 0x00ff) == 0x55)) || (reg & 0xfff0) == 0x630 || reg == 0x640 || reg == 0x642 || - reg == 0x662 || + reg == 0x662 || reg == 0x669 || ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) || reg == 0x73 || reg == 0x75 || reg == 0x77; case nct6776: @@ -1490,6 +1514,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) (reg & 0x00ff) == 0x55)) || (reg & 0xfff0) == 0x630 || reg == 0x402 || + (reg >= 0x409 && reg < 0x419 && (reg & 1)) || reg == 0x640 || reg == 0x642 || ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) || reg == 0x73 || reg == 0x75 || reg == 0x77; @@ -1504,6 +1529,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) return reg == 0x150 || reg == 0x153 || reg == 0x155 || (reg & 0xfff0) == 0x4c0 || reg == 0x402 || + (reg >= 0x409 && reg < 0x419 && (reg & 1)) || reg == 0x63a || reg == 0x63c || reg == 0x63e || reg == 0x640 || reg == 0x642 || reg == 0x64a || reg == 0x64c || @@ -1987,6 +2013,12 @@ static struct nct6775_data *nct6775_update_device(struct device *dev) data->REG_TEMP_OFFSET[i]); } + for (i = 0; i < NUM_TSI_TEMP; i++) { + if (!(data->have_tsi_temp & BIT(i))) + continue; + data->tsi_temp[i] = data->read_value(data, data->REG_TSI_TEMP[i]); + } + data->alarms = 0; for (i = 0; i < NUM_REG_ALARM; i++) { u8 alarm; @@ -2670,6 +2702,44 @@ static const struct sensor_template_group nct6775_temp_template_group = { .base = 1, }; +static ssize_t show_tsi_temp(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nct6775_data *data = nct6775_update_device(dev); + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + + return sysfs_emit(buf, "%u\n", tsi_temp_from_reg(data->tsi_temp[sattr->index])); +} + +static ssize_t show_tsi_temp_label(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + + return sysfs_emit(buf, "TSI%d_TEMP\n", sattr->index); +} + +SENSOR_TEMPLATE(tsi_temp_input, "temp%d_input", 0444, show_tsi_temp, NULL, 0); +SENSOR_TEMPLATE(tsi_temp_label, "temp%d_label", 0444, show_tsi_temp_label, NULL, 0); + +static umode_t nct6775_tsi_temp_is_visible(struct kobject *kobj, struct attribute *attr, + int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct nct6775_data *data = dev_get_drvdata(dev); + int temp = index / 2; + + return (data->have_tsi_temp & BIT(temp)) ? attr->mode : 0; +} + +/* + * The index calculation in nct6775_tsi_temp_is_visible() must be kept in + * sync with the size of this array. + */ +static struct sensor_device_template *nct6775_tsi_temp_template[] = { + &sensor_dev_template_tsi_temp_input, + &sensor_dev_template_tsi_temp_label, + NULL +}; + static ssize_t show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) { @@ -3948,10 +4018,11 @@ static int nct6775_probe(struct platform_device *pdev) const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config; const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; - int num_reg_temp, num_reg_temp_mon; + int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp; u8 cr2a; struct attribute_group *group; struct device *hwmon_dev; + struct sensor_template_group tsi_temp_tg; int num_attr_groups = 0; if (sio_data->access == access_direct) { @@ -4043,11 +4114,13 @@ static int nct6775_probe(struct platform_device *pdev) data->ALARM_BITS = NCT6106_ALARM_BITS; data->REG_BEEP = NCT6106_REG_BEEP; data->BEEP_BITS = NCT6106_BEEP_BITS; + data->REG_TSI_TEMP = NCT6106_REG_TSI_TEMP; reg_temp = NCT6106_REG_TEMP; reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6106_REG_TSI_TEMP); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -4116,11 +4189,13 @@ static int nct6775_probe(struct platform_device *pdev) data->ALARM_BITS = NCT6116_ALARM_BITS; data->REG_BEEP = NCT6106_REG_BEEP; data->BEEP_BITS = NCT6116_BEEP_BITS; + data->REG_TSI_TEMP = NCT6116_REG_TSI_TEMP; reg_temp = NCT6106_REG_TEMP; reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6116_REG_TSI_TEMP); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -4191,11 +4266,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6775_REG_ALARM; data->REG_BEEP = NCT6775_REG_BEEP; + data->REG_TSI_TEMP = NCT6775_REG_TSI_TEMP; reg_temp = NCT6775_REG_TEMP; reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6775_REG_TSI_TEMP); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; @@ -4264,11 +4341,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6775_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; reg_temp = NCT6775_REG_TEMP; reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; @@ -4341,11 +4420,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6779_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; reg_temp = NCT6779_REG_TEMP; reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -4460,6 +4541,24 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; else data->REG_BEEP = NCT6792_REG_BEEP; + switch (data->kind) { + case nct6791: + case nct6792: + case nct6793: + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); + break; + case nct6795: + case nct6796: + case nct6797: + case nct6798: + data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP; + num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP); + break; + default: + num_reg_tsi_temp = 0; + break; + } reg_temp = NCT6779_REG_TEMP; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); @@ -4659,6 +4758,12 @@ static int nct6775_probe(struct platform_device *pdev) } #endif /* USE_ALTERNATE */ + /* Check which TSIx_TEMP registers are active */ + for (i = 0; i < num_reg_tsi_temp; i++) { + if (data->read_value(data, data->REG_TSI_TEMP[i])) + data->have_tsi_temp |= BIT(i); + } + /* Initialize the chip */ nct6775_init_device(data); @@ -4766,6 +4871,18 @@ static int nct6775_probe(struct platform_device *pdev) return PTR_ERR(group); data->groups[num_attr_groups++] = group; + + if (data->have_tsi_temp) { + tsi_temp_tg.templates = nct6775_tsi_temp_template; + tsi_temp_tg.is_visible = nct6775_tsi_temp_is_visible; + tsi_temp_tg.base = fls(data->have_temp) + 1; + group = nct6775_create_attr_group(dev, &tsi_temp_tg, fls(data->have_tsi_temp)); + if (IS_ERR(group)) + return PTR_ERR(group); + + data->groups[num_attr_groups++] = group; + } + data->groups[num_attr_groups++] = &nct6775_group_other; hwmon_dev = devm_hwmon_device_register_with_groups(dev, data->name, -- GitLab From 32b9a19a1966fb3124d7ddd60ebd08688be2f3fa Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 10 Jan 2022 18:22:55 +0000 Subject: [PATCH 0720/1586] ABI: hwmon: Document "label" sysfs attribute Add the "label" sysfs attribute, which can contain a descriptive label that allows to uniquely identify a device within the system. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20220110182256.30763-2-paul@crapouillou.net Signed-off-by: Guenter Roeck --- Documentation/ABI/testing/sysfs-class-hwmon | 8 ++++++++ Documentation/hwmon/sysfs-interface.rst | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-hwmon b/Documentation/ABI/testing/sysfs-class-hwmon index 1f20687def44c..653d4c75eddb3 100644 --- a/Documentation/ABI/testing/sysfs-class-hwmon +++ b/Documentation/ABI/testing/sysfs-class-hwmon @@ -9,6 +9,14 @@ Description: RO +What: /sys/class/hwmon/hwmonX/label +Description: + A descriptive label that allows to uniquely identify a + device within the system. + The contents of the label are free-form. + + RO + What: /sys/class/hwmon/hwmonX/update_interval Description: The interval at which the chip will update readings. diff --git a/Documentation/hwmon/sysfs-interface.rst b/Documentation/hwmon/sysfs-interface.rst index 85652a6aaa3e7..209626fb24052 100644 --- a/Documentation/hwmon/sysfs-interface.rst +++ b/Documentation/hwmon/sysfs-interface.rst @@ -99,6 +99,10 @@ Global attributes `name` The chip name. +`label` + A descriptive label that allows to uniquely identify a device + within the system. + `update_interval` The interval at which the chip will update readings. -- GitLab From e1c9d6d61ddf3f34f14d3de51d6eea68683b5841 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 10 Jan 2022 18:22:56 +0000 Subject: [PATCH 0721/1586] hwmon: Add "label" attribute If a label is defined in the device tree for this device add that to the device specific attributes. This is useful for userspace to be able to identify an individual device when multiple identical chips are present in the system. Signed-off-by: Paul Cercueil Tested-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220110182256.30763-3-paul@crapouillou.net Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 0b79c4e1bf99f..6c3a8c65390d8 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ struct hwmon_device { const char *name; + const char *label; struct device dev; const struct hwmon_chip_info *chip; struct list_head tzdata; @@ -71,17 +73,29 @@ name_show(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR_RO(name); +static ssize_t +label_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", to_hwmon_device(dev)->label); +} +static DEVICE_ATTR_RO(label); + static struct attribute *hwmon_dev_attrs[] = { &dev_attr_name.attr, + &dev_attr_label.attr, NULL }; -static umode_t hwmon_dev_name_is_visible(struct kobject *kobj, +static umode_t hwmon_dev_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) { struct device *dev = kobj_to_dev(kobj); + struct hwmon_device *hdev = to_hwmon_device(dev); - if (to_hwmon_device(dev)->name == NULL) + if (attr == &dev_attr_name.attr && hdev->name == NULL) + return 0; + + if (attr == &dev_attr_label.attr && hdev->label == NULL) return 0; return attr->mode; @@ -89,7 +103,7 @@ static umode_t hwmon_dev_name_is_visible(struct kobject *kobj, static const struct attribute_group hwmon_dev_attr_group = { .attrs = hwmon_dev_attrs, - .is_visible = hwmon_dev_name_is_visible, + .is_visible = hwmon_dev_attr_is_visible, }; static const struct attribute_group *hwmon_dev_attr_groups[] = { @@ -117,6 +131,7 @@ static void hwmon_dev_release(struct device *dev) if (hwdev->group.attrs) hwmon_free_attrs(hwdev->group.attrs); kfree(hwdev->groups); + kfree(hwdev->label); kfree(hwdev); } @@ -738,6 +753,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, const struct attribute_group **groups) { struct hwmon_device *hwdev; + const char *label; struct device *hdev; int i, err, id; @@ -793,6 +809,18 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, hdev->groups = groups; } + if (device_property_present(dev, "label")) { + err = device_property_read_string(dev, "label", &label); + if (err < 0) + goto free_hwmon; + + hwdev->label = kstrdup(label, GFP_KERNEL); + if (hwdev->label == NULL) { + err = -ENOMEM; + goto free_hwmon; + } + } + hwdev->name = name; hdev->class = &hwmon_class; hdev->parent = dev; -- GitLab From 23a8d76e5ec5fea267d5c822477b8b11681c2b45 Mon Sep 17 00:00:00 2001 From: Denis Pauk Date: Wed, 12 Jan 2022 23:50:13 +0200 Subject: [PATCH 0722/1586] hwmon: (nct6775) add ASUS Pro B550M-C/PRIME B550M-A MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASUS Pro B550M-C/PRIME B550M-A boards have got an nct6775 chip, but by default there's no use of it because of resource conflict with WMI method. This commit adds "Pro B550M-C" and "PRIME B550M-A" to the list of boards that can be monitored using ASUS WMI. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204807 Signed-off-by: Denis Pauk Tested-by: Gregory P. Smith Tested-by: Joel Wirāmu Tested-by: Jonathan Farrugia Link: https://lore.kernel.org/r/20220112215013.11694-1-pauk.denis@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 63e15b5f93a0e..4ef468b413a1f 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -5102,9 +5102,12 @@ static struct platform_device *pdev[2]; static const char * const asus_wmi_boards[] = { "ProArt X570-CREATOR WIFI", + "Pro B550M-C", "Pro WS X570-ACE", "PRIME B360-PLUS", "PRIME B460-PLUS", + "PRIME B550M-A", + "PRIME B550M-A (WI-FI)", "PRIME X570-PRO", "ROG CROSSHAIR VIII DARK HERO", "ROG CROSSHAIR VIII FORMULA", -- GitLab From de00c068c4ac9391f3c604ad2f06ac1e73005754 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 10 Jan 2022 05:47:12 +0300 Subject: [PATCH 0723/1586] hwmon: (nct6775) add PRIME B550-PLUS motherboard to whitelist Asus PRIME B550-PLUS motherboards have got an nct6775 chip. Its resource range is covered by the \AMW0.SHWM OpRegion, so the chip is unusable when using SIO. However ASUS WMI access works. Add PRIME B550-PLUS to the list of motherboards using ASUS WMI to read data. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220110024712.753492-1-dmitry.baryshkov@linaro.org Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 4ef468b413a1f..fbf6266c7ba7b 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -5106,6 +5106,7 @@ static const char * const asus_wmi_boards[] = { "Pro WS X570-ACE", "PRIME B360-PLUS", "PRIME B460-PLUS", + "PRIME B550-PLUS", "PRIME B550M-A", "PRIME B550M-A (WI-FI)", "PRIME X570-PRO", -- GitLab From 1b089084ec654c05df202896d54df8d92b16fc1e Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 9 Jan 2022 22:42:46 +0100 Subject: [PATCH 0724/1586] Documentation: admin-guide: Update i8k driver name The driver should be called dell_smm_hwmon, i8k is only an alias now. Signed-off-by: Armin Wolf Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20220109214248.61759-2-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- .../admin-guide/kernel-parameters.txt | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f5a27f067db9e..7189402e9fc7c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -944,6 +944,24 @@ dump out devices still on the deferred probe list after retrying. + dell_smm_hwmon.ignore_dmi= + [HW] Continue probing hardware even if DMI data + indicates that the driver is running on unsupported + hardware. + + dell_smm_hwmon.force= + [HW] Activate driver even if SMM BIOS signature does + not match list of supported models and enable otherwise + blacklisted features. + + dell_smm_hwmon.power_status= + [HW] Report power status in /proc/i8k + (disabled by default). + + dell_smm_hwmon.restricted= + [HW] Allow controlling fans only if SYS_ADMIN + capability is set. + dfltcc= [HW,S390] Format: { on | off | def_only | inf_only | always } on: s390 zlib hardware support for compression on @@ -1703,17 +1721,6 @@ i810= [HW,DRM] - i8k.ignore_dmi [HW] Continue probing hardware even if DMI data - indicates that the driver is running on unsupported - hardware. - i8k.force [HW] Activate i8k driver even if SMM BIOS signature - does not match list of supported models. - i8k.power_status - [HW] Report power status in /proc/i8k - (disabled by default) - i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN - capability is set. - i915.invert_brightness= [DRM] Invert the sense of the variable that is used to set the brightness of the panel backlight. Normally a -- GitLab From 99fdc5875b005b6014be2bd2a7c6aaf0abe2896d Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 9 Jan 2022 22:42:47 +0100 Subject: [PATCH 0725/1586] Documentation: admin-guide: Add Documentation for undocumented dell_smm_hwmon parameters Add documentation for fan_mult and fan_max. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220109214248.61759-3-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7189402e9fc7c..d68053db21cc6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -962,6 +962,12 @@ [HW] Allow controlling fans only if SYS_ADMIN capability is set. + dell_smm_hwmon.fan_mult= + [HW] Factor to multiply fan speed with. + + dell_smm_hwmon.fan_max= + [HW] Maximum configurable fan speed. + dfltcc= [HW,S390] Format: { on | off | def_only | inf_only | always } on: s390 zlib hardware support for compression on -- GitLab From 34781a6bec382dd11254caff0d379c7be70576fd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 9 Jan 2022 22:42:48 +0100 Subject: [PATCH 0726/1586] Documentation: ABI: Add ABI file for legacy /proc/i8k interface Add ABI file for informing remaining users of the deprecation of the legacy /proc/i8k interface. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220109214248.61759-4-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- Documentation/ABI/obsolete/procfs-i8k | 10 ++++++++++ MAINTAINERS | 1 + 2 files changed, 11 insertions(+) create mode 100644 Documentation/ABI/obsolete/procfs-i8k diff --git a/Documentation/ABI/obsolete/procfs-i8k b/Documentation/ABI/obsolete/procfs-i8k new file mode 100644 index 0000000000000..32df4d5bdd153 --- /dev/null +++ b/Documentation/ABI/obsolete/procfs-i8k @@ -0,0 +1,10 @@ +What: /proc/i8k +Date: November 2001 +KernelVersion: 2.4.14 +Contact: Pali Rohár +Description: Legacy interface for getting/setting sensor information like + fan speed, temperature, serial number, hotkey status etc + on Dell Laptops. + Since the driver is now using the standard hwmon sysfs interface, + the procfs interface is deprecated. +Users: https://github.com/vitorafsr/i8kutils diff --git a/MAINTAINERS b/MAINTAINERS index 1ba1e4af2cbc8..293c925b6beb7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5410,6 +5410,7 @@ F: drivers/platform/x86/dell/dell-rbtn.* DELL LAPTOP SMM DRIVER M: Pali Rohár S: Maintained +F: Documentation/ABI/obsolete/procfs-i8k F: drivers/hwmon/dell-smm-hwmon.c F: include/uapi/linux/i8k.h -- GitLab From 848da7b58796a2583e88dfa0c974bcaa7e40fbec Mon Sep 17 00:00:00 2001 From: Anthony DeRossi Date: Mon, 10 Jan 2022 21:18:42 -0800 Subject: [PATCH 0727/1586] hwmon: (asus_wmi_ec_sensors) Support T_Sensor on Prime X570-Pro Asus Prime X570-Pro motherboards have a T_Sensor header that can be connected to an optional temperature probe. Signed-off-by: Anthony DeRossi Link: https://lore.kernel.org/r/20220111051842.25634-1-ajderossi@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus_wmi_ec_sensors.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/asus_wmi_ec_sensors.c b/drivers/hwmon/asus_wmi_ec_sensors.c index 22a1459305a79..a3a2f014dec03 100644 --- a/drivers/hwmon/asus_wmi_ec_sensors.c +++ b/drivers/hwmon/asus_wmi_ec_sensors.c @@ -112,7 +112,8 @@ struct asus_wmi_data { /* boards with EC support */ static struct asus_wmi_data sensors_board_PW_X570_P = { .known_board_sensors = { - SENSOR_TEMP_CHIPSET, SENSOR_TEMP_CPU, SENSOR_TEMP_MB, SENSOR_TEMP_VRM, + SENSOR_TEMP_CHIPSET, SENSOR_TEMP_CPU, SENSOR_TEMP_MB, + SENSOR_TEMP_T_SENSOR, SENSOR_TEMP_VRM, SENSOR_FAN_CHIPSET, SENSOR_MAX }, -- GitLab From 6dd0ea4c565f73dca55a20bc1ab1587a6587d687 Mon Sep 17 00:00:00 2001 From: Denis Pauk Date: Wed, 12 Jan 2022 23:49:17 +0200 Subject: [PATCH 0728/1586] hwmon: (asus_wmi_sensors) add ASUS ROG STRIX B450-F GAMING II ASUS ROG STRIX B450-F GAMING II has support of the same WMI monitoring method as ASUS ROG STRIX B450-F GAMING. This commit adds "ASUS ROG STRIX B450-F GAMING II" to the list of boards that can be monitored using ASUS WMI. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204807 Signed-off-by: Denis Pauk Tested-by: Aleksa Savic Link: https://lore.kernel.org/r/20220112214917.11662-1-pauk.denis@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus_wmi_sensors.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c index c80eee874b6c0..8fdcb62ae52de 100644 --- a/drivers/hwmon/asus_wmi_sensors.c +++ b/drivers/hwmon/asus_wmi_sensors.c @@ -77,6 +77,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = { DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-E GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING II"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-I GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X399-E GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X470-F GAMING"), -- GitLab From e505e44fb09aa2024c7f1df0551e7c6b50b0f8db Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Mon, 17 Jan 2022 17:12:49 +0100 Subject: [PATCH 0729/1586] hwmon: (pmbus) Remove trailing whitespaces from Kconfig file Fix checkpatch issues by removing trailing whitespaces in Kconfig. Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/c984b88b136a1cde16ce52c5f818886653b0f84a.1642434222.git.sylv@sylv.io [groeck: Updated subject] Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig index 41f6cbf96d3b9..c96f7b7338bd7 100644 --- a/drivers/hwmon/pmbus/Kconfig +++ b/drivers/hwmon/pmbus/Kconfig @@ -189,8 +189,8 @@ config SENSORS_LTC2978_REGULATOR depends on SENSORS_LTC2978 && REGULATOR help If you say yes here you get regulator support for Linear Technology - LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880, - LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686, + LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880, + LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686, and LTM4700. config SENSORS_LTC3815 -- GitLab From f86380b797834bed6cd55c6c1f49a590356ec828 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jan 2022 19:55:13 -0600 Subject: [PATCH 0730/1586] dt-bindings: hwmon: lm90: Drop Tegra specifics from example There's no need to complicate examples with a platform specific macro. It also complicates example parsing to figure out the number of interrupt cells in examples (based on bracketing). Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220119015514.2441231-1-robh@kernel.org Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/hwmon/national,lm90.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml index 6e1d54ff5d5b6..30db92977937b 100644 --- a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml +++ b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml @@ -60,7 +60,6 @@ additionalProperties: false examples: - | - #include #include i2c { @@ -71,8 +70,7 @@ examples: compatible = "onnn,nct1008"; reg = <0x4c>; vcc-supply = <&palmas_ldo6_reg>; - interrupt-parent = <&gpio>; - interrupts = ; + interrupts = <4 IRQ_TYPE_LEVEL_LOW>; #thermal-sensor-cells = <1>; }; }; -- GitLab From 15b1c188f8cf2ff9f296c9781d1e4fc061aaf371 Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Tue, 18 Jan 2022 09:56:09 +0200 Subject: [PATCH 0731/1586] hwmon: (powr1220) Cosmetic changes Update code alignments. Signed-off-by: Michael Shych Link: https://lore.kernel.org/r/20220118075611.10665-2-michaelsh@nvidia.com Signed-off-by: Guenter Roeck --- drivers/hwmon/powr1220.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c index 9e086338dcbaa..542e4a7b02341 100644 --- a/drivers/hwmon/powr1220.c +++ b/drivers/hwmon/powr1220.c @@ -111,7 +111,7 @@ static int powr1220_read_adc(struct device *dev, int ch_num) mutex_lock(&data->update_lock); if (time_after(jiffies, data->adc_last_updated[ch_num] + HZ) || - !data->adc_valid[ch_num]) { + !data->adc_valid[ch_num]) { /* * figure out if we need to use the attenuator for * high inputs or inputs that we don't yet have a measurement @@ -119,12 +119,12 @@ static int powr1220_read_adc(struct device *dev, int ch_num) * max reading. */ if (data->adc_maxes[ch_num] > ADC_MAX_LOW_MEASUREMENT_MV || - data->adc_maxes[ch_num] == 0) + data->adc_maxes[ch_num] == 0) adc_range = 1 << 4; /* set the attenuator and mux */ result = i2c_smbus_write_byte_data(data->client, ADC_MUX, - adc_range | ch_num); + adc_range | ch_num); if (result) goto exit; -- GitLab From 915d4664b7158d8d0f44da810186742c69300f02 Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Tue, 18 Jan 2022 09:56:10 +0200 Subject: [PATCH 0732/1586] hwmon: (powr1220) Upgrade driver to support hwmon info infrastructure Reduce code by using devm_hwmon_device_register_with_groups() API by devm_hwmon_device_register_with_info() API. The motivation is to reduce code and to allow easy support for similar devices by the same driver. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220118075611.10665-3-michaelsh@nvidia.com Signed-off-by: Guenter Roeck --- drivers/hwmon/powr1220.c | 207 +++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 115 deletions(-) diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c index 542e4a7b02341..0fa1a136eec84 100644 --- a/drivers/hwmon/powr1220.c +++ b/drivers/hwmon/powr1220.c @@ -167,135 +167,109 @@ exit: return result; } -/* Shows the voltage associated with the specified ADC channel */ -static ssize_t powr1220_voltage_show(struct device *dev, - struct device_attribute *dev_attr, - char *buf) +static umode_t +powr1220_is_visible(const void *data, enum hwmon_sensor_types type, u32 + attr, int channel) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr); - int adc_val = powr1220_read_adc(dev, attr->index); - - if (adc_val < 0) - return adc_val; + switch (type) { + case hwmon_in: + switch (attr) { + case hwmon_in_input: + case hwmon_in_highest: + case hwmon_in_label: + return 0444; + default: + break; + } + break; + default: + break; + } - return sprintf(buf, "%d\n", adc_val); + return 0; } -/* Shows the maximum setting associated with the specified ADC channel */ -static ssize_t powr1220_max_show(struct device *dev, - struct device_attribute *dev_attr, char *buf) +static int +powr1220_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr); - struct powr1220_data *data = dev_get_drvdata(dev); + switch (type) { + case hwmon_in: + switch (attr) { + case hwmon_in_label: + *str = input_names[channel]; + return 0; + default: + return -EOPNOTSUPP; + } + break; + default: + return -EOPNOTSUPP; + } - return sprintf(buf, "%d\n", data->adc_maxes[attr->index]); + return -EOPNOTSUPP; } -/* Shows the label associated with the specified ADC channel */ -static ssize_t powr1220_label_show(struct device *dev, - struct device_attribute *dev_attr, - char *buf) +static int +powr1220_read(struct device *dev, enum hwmon_sensor_types type, u32 + attr, int channel, long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr); + struct powr1220_data *data = dev_get_drvdata(dev); + int ret; + + switch (type) { + case hwmon_in: + switch (attr) { + case hwmon_in_input: + ret = powr1220_read_adc(dev, channel); + if (ret < 0) + return ret; + *val = ret; + break; + case hwmon_in_highest: + *val = data->adc_maxes[channel]; + break; + default: + return -EOPNOTSUPP; + } + break; + default: + return -EOPNOTSUPP; +} - return sprintf(buf, "%s\n", input_names[attr->index]); + return 0; } -static SENSOR_DEVICE_ATTR_RO(in0_input, powr1220_voltage, VMON1); -static SENSOR_DEVICE_ATTR_RO(in1_input, powr1220_voltage, VMON2); -static SENSOR_DEVICE_ATTR_RO(in2_input, powr1220_voltage, VMON3); -static SENSOR_DEVICE_ATTR_RO(in3_input, powr1220_voltage, VMON4); -static SENSOR_DEVICE_ATTR_RO(in4_input, powr1220_voltage, VMON5); -static SENSOR_DEVICE_ATTR_RO(in5_input, powr1220_voltage, VMON6); -static SENSOR_DEVICE_ATTR_RO(in6_input, powr1220_voltage, VMON7); -static SENSOR_DEVICE_ATTR_RO(in7_input, powr1220_voltage, VMON8); -static SENSOR_DEVICE_ATTR_RO(in8_input, powr1220_voltage, VMON9); -static SENSOR_DEVICE_ATTR_RO(in9_input, powr1220_voltage, VMON10); -static SENSOR_DEVICE_ATTR_RO(in10_input, powr1220_voltage, VMON11); -static SENSOR_DEVICE_ATTR_RO(in11_input, powr1220_voltage, VMON12); -static SENSOR_DEVICE_ATTR_RO(in12_input, powr1220_voltage, VCCA); -static SENSOR_DEVICE_ATTR_RO(in13_input, powr1220_voltage, VCCINP); - -static SENSOR_DEVICE_ATTR_RO(in0_highest, powr1220_max, VMON1); -static SENSOR_DEVICE_ATTR_RO(in1_highest, powr1220_max, VMON2); -static SENSOR_DEVICE_ATTR_RO(in2_highest, powr1220_max, VMON3); -static SENSOR_DEVICE_ATTR_RO(in3_highest, powr1220_max, VMON4); -static SENSOR_DEVICE_ATTR_RO(in4_highest, powr1220_max, VMON5); -static SENSOR_DEVICE_ATTR_RO(in5_highest, powr1220_max, VMON6); -static SENSOR_DEVICE_ATTR_RO(in6_highest, powr1220_max, VMON7); -static SENSOR_DEVICE_ATTR_RO(in7_highest, powr1220_max, VMON8); -static SENSOR_DEVICE_ATTR_RO(in8_highest, powr1220_max, VMON9); -static SENSOR_DEVICE_ATTR_RO(in9_highest, powr1220_max, VMON10); -static SENSOR_DEVICE_ATTR_RO(in10_highest, powr1220_max, VMON11); -static SENSOR_DEVICE_ATTR_RO(in11_highest, powr1220_max, VMON12); -static SENSOR_DEVICE_ATTR_RO(in12_highest, powr1220_max, VCCA); -static SENSOR_DEVICE_ATTR_RO(in13_highest, powr1220_max, VCCINP); - -static SENSOR_DEVICE_ATTR_RO(in0_label, powr1220_label, VMON1); -static SENSOR_DEVICE_ATTR_RO(in1_label, powr1220_label, VMON2); -static SENSOR_DEVICE_ATTR_RO(in2_label, powr1220_label, VMON3); -static SENSOR_DEVICE_ATTR_RO(in3_label, powr1220_label, VMON4); -static SENSOR_DEVICE_ATTR_RO(in4_label, powr1220_label, VMON5); -static SENSOR_DEVICE_ATTR_RO(in5_label, powr1220_label, VMON6); -static SENSOR_DEVICE_ATTR_RO(in6_label, powr1220_label, VMON7); -static SENSOR_DEVICE_ATTR_RO(in7_label, powr1220_label, VMON8); -static SENSOR_DEVICE_ATTR_RO(in8_label, powr1220_label, VMON9); -static SENSOR_DEVICE_ATTR_RO(in9_label, powr1220_label, VMON10); -static SENSOR_DEVICE_ATTR_RO(in10_label, powr1220_label, VMON11); -static SENSOR_DEVICE_ATTR_RO(in11_label, powr1220_label, VMON12); -static SENSOR_DEVICE_ATTR_RO(in12_label, powr1220_label, VCCA); -static SENSOR_DEVICE_ATTR_RO(in13_label, powr1220_label, VCCINP); - -static struct attribute *powr1220_attrs[] = { - &sensor_dev_attr_in0_input.dev_attr.attr, - &sensor_dev_attr_in1_input.dev_attr.attr, - &sensor_dev_attr_in2_input.dev_attr.attr, - &sensor_dev_attr_in3_input.dev_attr.attr, - &sensor_dev_attr_in4_input.dev_attr.attr, - &sensor_dev_attr_in5_input.dev_attr.attr, - &sensor_dev_attr_in6_input.dev_attr.attr, - &sensor_dev_attr_in7_input.dev_attr.attr, - &sensor_dev_attr_in8_input.dev_attr.attr, - &sensor_dev_attr_in9_input.dev_attr.attr, - &sensor_dev_attr_in10_input.dev_attr.attr, - &sensor_dev_attr_in11_input.dev_attr.attr, - &sensor_dev_attr_in12_input.dev_attr.attr, - &sensor_dev_attr_in13_input.dev_attr.attr, - - &sensor_dev_attr_in0_highest.dev_attr.attr, - &sensor_dev_attr_in1_highest.dev_attr.attr, - &sensor_dev_attr_in2_highest.dev_attr.attr, - &sensor_dev_attr_in3_highest.dev_attr.attr, - &sensor_dev_attr_in4_highest.dev_attr.attr, - &sensor_dev_attr_in5_highest.dev_attr.attr, - &sensor_dev_attr_in6_highest.dev_attr.attr, - &sensor_dev_attr_in7_highest.dev_attr.attr, - &sensor_dev_attr_in8_highest.dev_attr.attr, - &sensor_dev_attr_in9_highest.dev_attr.attr, - &sensor_dev_attr_in10_highest.dev_attr.attr, - &sensor_dev_attr_in11_highest.dev_attr.attr, - &sensor_dev_attr_in12_highest.dev_attr.attr, - &sensor_dev_attr_in13_highest.dev_attr.attr, - - &sensor_dev_attr_in0_label.dev_attr.attr, - &sensor_dev_attr_in1_label.dev_attr.attr, - &sensor_dev_attr_in2_label.dev_attr.attr, - &sensor_dev_attr_in3_label.dev_attr.attr, - &sensor_dev_attr_in4_label.dev_attr.attr, - &sensor_dev_attr_in5_label.dev_attr.attr, - &sensor_dev_attr_in6_label.dev_attr.attr, - &sensor_dev_attr_in7_label.dev_attr.attr, - &sensor_dev_attr_in8_label.dev_attr.attr, - &sensor_dev_attr_in9_label.dev_attr.attr, - &sensor_dev_attr_in10_label.dev_attr.attr, - &sensor_dev_attr_in11_label.dev_attr.attr, - &sensor_dev_attr_in12_label.dev_attr.attr, - &sensor_dev_attr_in13_label.dev_attr.attr, +static const struct hwmon_channel_info *powr1220_info[] = { + HWMON_CHANNEL_INFO(in, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL), NULL }; -ATTRIBUTE_GROUPS(powr1220); +static const struct hwmon_ops powr1220_hwmon_ops = { + .read = powr1220_read, + .read_string = powr1220_read_string, + .is_visible = powr1220_is_visible, +}; + +static const struct hwmon_chip_info powr1220_chip_info = { + .ops = &powr1220_hwmon_ops, + .info = powr1220_info, +}; static int powr1220_probe(struct i2c_client *client) { @@ -312,8 +286,11 @@ static int powr1220_probe(struct i2c_client *client) mutex_init(&data->update_lock); data->client = client; - hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev, - client->name, data, powr1220_groups); + hwmon_dev = devm_hwmon_device_register_with_info(&client->dev, + client->name, + data, + &powr1220_chip_info, + NULL); return PTR_ERR_OR_ZERO(hwmon_dev); } -- GitLab From 9f93aa1005fa1b960f10e0ee3ed8c4e697526053 Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Tue, 18 Jan 2022 09:56:11 +0200 Subject: [PATCH 0733/1586] hwmon: (powr1220) Add support for Lattice's POWR1014 power manager IC This patch adds support for Lattice's POWR1014 power manager IC. Read access to all the ADCs on the chip are supported through the "hwmon" "sysfs" files. The main differences of POWR1014 compared to POWR1220 are amount of VMON input lines: 10 on POWR1014 and 12 lines on POWR1220 and number of output control signals: 14 on POWR1014 and 20 on POWR1220. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220118075611.10665-4-michaelsh@nvidia.com Signed-off-by: Guenter Roeck --- drivers/hwmon/powr1220.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c index 0fa1a136eec84..f77dc6db31ac5 100644 --- a/drivers/hwmon/powr1220.c +++ b/drivers/hwmon/powr1220.c @@ -22,6 +22,8 @@ #define ADC_STEP_MV 2 #define ADC_MAX_LOW_MEASUREMENT_MV 2000 +enum powr1xxx_chips { powr1014, powr1220 }; + enum powr1220_regs { VMON_STATUS0, VMON_STATUS1, @@ -74,6 +76,7 @@ enum powr1220_adc_values { struct powr1220_data { struct i2c_client *client; struct mutex update_lock; + u8 max_channels; bool adc_valid[MAX_POWR1220_ADC_VALUES]; /* the next value is in jiffies */ unsigned long adc_last_updated[MAX_POWR1220_ADC_VALUES]; @@ -171,6 +174,11 @@ static umode_t powr1220_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) { + struct powr1220_data *chip_data = (struct powr1220_data *)data; + + if (channel >= chip_data->max_channels) + return 0; + switch (type) { case hwmon_in: switch (attr) { @@ -271,6 +279,8 @@ static const struct hwmon_chip_info powr1220_chip_info = { .info = powr1220_info, }; +static const struct i2c_device_id powr1220_ids[]; + static int powr1220_probe(struct i2c_client *client) { struct powr1220_data *data; @@ -283,6 +293,15 @@ static int powr1220_probe(struct i2c_client *client) if (!data) return -ENOMEM; + switch (i2c_match_id(powr1220_ids, client)->driver_data) { + case powr1014: + data->max_channels = 10; + break; + default: + data->max_channels = 12; + break; + } + mutex_init(&data->update_lock); data->client = client; @@ -296,7 +315,8 @@ static int powr1220_probe(struct i2c_client *client) } static const struct i2c_device_id powr1220_ids[] = { - { "powr1220", 0, }, + { "powr1014", powr1014, }, + { "powr1220", powr1220, }, { } }; -- GitLab From 07320c91565658e117f2f86a190eec9bb64abeb6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 31 Jan 2022 16:27:40 +0000 Subject: [PATCH 0734/1586] hwmon: Fix possible NULL pointer The recent addition of the label attribute added some code that read the "label" device property, without checking first that "dev" was non-NULL. Fix this issue by first checking that "dev" is non-NULL. Fixes: ccd98cba6a18 ("hwmon: Add "label" attribute") Signed-off-by: Paul Cercueil Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 6c3a8c65390d8..0d6c6809f26cc 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -809,7 +809,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, hdev->groups = groups; } - if (device_property_present(dev, "label")) { + if (dev && device_property_present(dev, "label")) { err = device_property_read_string(dev, "label", &label); if (err < 0) goto free_hwmon; -- GitLab From 4db3c09228a0899cdd791b4e34cd102a3288fd8e Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 31 Jan 2022 22:19:32 +0100 Subject: [PATCH 0735/1586] hwmon: (sch56xx) Autoload modules on platform device creation Right now, when sch56xx-common has detected a SCH5627/SCH5636 superio chip, the corresponding module is not automatically loaded. Fix that by adding the necessary device tables to both modules. Tested on a Fujitsu Esprimo P720. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220131211935.3656-2-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Guenter Roeck --- drivers/hwmon/sch5627.c | 10 ++++++++++ drivers/hwmon/sch5636.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c index 8f1b569c69e7b..72c3f6757e348 100644 --- a/drivers/hwmon/sch5627.c +++ b/drivers/hwmon/sch5627.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -456,11 +457,20 @@ static int sch5627_probe(struct platform_device *pdev) return 0; } +static const struct platform_device_id sch5627_device_id[] = { + { + .name = "sch5627", + }, + { } +}; +MODULE_DEVICE_TABLE(platform, sch5627_device_id); + static struct platform_driver sch5627_driver = { .driver = { .name = DRVNAME, }, .probe = sch5627_probe, + .id_table = sch5627_device_id, }; module_platform_driver(sch5627_driver); diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c index 39ff1c9b1df54..269757bc3a9e0 100644 --- a/drivers/hwmon/sch5636.c +++ b/drivers/hwmon/sch5636.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -501,12 +502,21 @@ error: return err; } +static const struct platform_device_id sch5636_device_id[] = { + { + .name = "sch5636", + }, + { } +}; +MODULE_DEVICE_TABLE(platform, sch5636_device_id); + static struct platform_driver sch5636_driver = { .driver = { .name = DRVNAME, }, .probe = sch5636_probe, .remove = sch5636_remove, + .id_table = sch5636_device_id, }; module_platform_driver(sch5636_driver); -- GitLab From 393935baa45e5ccb9603cf7f9f020ed1bc0915f7 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 31 Jan 2022 22:19:33 +0100 Subject: [PATCH 0736/1586] hwmon: (sch56xx-common) Add automatic module loading on supported devices This patch enables the sch56xx-common module to get automatically loaded on supported machines. If a machine supports Fujitsu's SCH56XX-based hardware monitoring solutions, it contains a "Antiope"/" Antiope" dmi onboard device in case of the sch5627 or a "Theseus"/" Theseus" dmi onboard device in case of the sch5636. Since some machines like the Esprimo C700 have a seemingly faulty DMI table containing both onboard devices, the driver still needs to probe for the individual superio chip, which in presence of at least one DMI onboard device however can be considered safe. Also add a module parameter allowing for bypassing the DMI check. Tested on a Fujitsu Esprimo P720. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220131211935.3656-3-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Guenter Roeck --- drivers/hwmon/sch56xx-common.c | 40 ++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 40cdadad35e52..0172aa16dc0cc 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -7,8 +7,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include +#include #include #include #include @@ -19,7 +21,10 @@ #include #include "sch56xx-common.h" -/* Insmod parameters */ +static bool ignore_dmi; +module_param(ignore_dmi, bool, 0); +MODULE_PARM_DESC(ignore_dmi, "Omit DMI check for supported devices (default=0)"); + static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" @@ -518,11 +523,42 @@ static int __init sch56xx_device_add(int address, const char *name) return PTR_ERR_OR_ZERO(sch56xx_pdev); } +/* For autoloading only */ +static const struct dmi_system_id sch56xx_dmi_table[] __initconst = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + }, + }, + { } +}; +MODULE_DEVICE_TABLE(dmi, sch56xx_dmi_table); + static int __init sch56xx_init(void) { - int address; const char *name = NULL; + int address; + if (!ignore_dmi) { + if (!dmi_check_system(sch56xx_dmi_table)) + return -ENODEV; + + /* + * Some machines like the Esprimo P720 and Esprimo C700 have + * onboard devices named " Antiope"/" Theseus" instead of + * "Antiope"/"Theseus", so we need to check for both. + */ + if (!dmi_find_device(DMI_DEV_TYPE_OTHER, "Antiope", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OTHER, " Antiope", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OTHER, "Theseus", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OTHER, " Theseus", NULL)) + return -ENODEV; + } + + /* + * Some devices like the Esprimo C700 have both onboard devices, + * so we still have to check manually + */ address = sch56xx_find(0x4e, &name); if (address < 0) address = sch56xx_find(0x2e, &name); -- GitLab From 799c3e1e5348effda4312764bb484a084d561949 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 31 Jan 2022 22:19:34 +0100 Subject: [PATCH 0737/1586] hwmon: (sch56xx-common) Replace msleep() with usleep_range() msleep(1) will often sleep more than 20ms, slowing down sensor and watchdog reads/writes. Use usleep_range() as recommended in timers-howto.rst to fix that. Tested on a Fujitsu Esprimo P720. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220131211935.3656-4-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Guenter Roeck --- drivers/hwmon/sch56xx-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 0172aa16dc0cc..82602b74c7edb 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -139,7 +139,7 @@ static int sch56xx_send_cmd(u16 addr, u8 cmd, u16 reg, u8 v) /* EM Interface Polling "Algorithm" */ for (i = 0; i < max_busy_polls + max_lazy_polls; i++) { if (i >= max_busy_polls) - msleep(1); + usleep_range(1000, 2000); /* Read Interrupt source Register */ val = inb(addr + 8); /* Write Clear the interrupt source bits */ -- GitLab From 647d6f09bea7dacf4cdb6d4ea7e3051883955297 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 31 Jan 2022 22:19:35 +0100 Subject: [PATCH 0738/1586] hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING If the watchdog was already enabled by the BIOS after booting, the watchdog infrastructure needs to regularly send keepalives to prevent a unexpected reset. WDOG_ACTIVE only serves as an status indicator for userspace, we want to use WDOG_HW_RUNNING instead. Since my Fujitsu Esprimo P720 does not support the watchdog, this change is compile-tested only. Suggested-by: Guenter Roeck Fixes: fb551405c0f8 (watchdog: sch56xx: Use watchdog core) Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220131211935.3656-5-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Guenter Roeck --- drivers/hwmon/sch56xx-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 82602b74c7edb..3ece53adabd62 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -427,7 +427,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision, data->wddev.max_timeout = 255 * 60; watchdog_set_nowayout(&data->wddev, nowayout); if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE) - set_bit(WDOG_ACTIVE, &data->wddev.status); + set_bit(WDOG_HW_RUNNING, &data->wddev.status); /* Since the watchdog uses a downcounter there is no register to read the BIOS set timeout from (if any was set at all) -> -- GitLab From d0ddfd241e5719d696bc0b081e260db69d368668 Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Mon, 24 Jan 2022 02:56:43 +0100 Subject: [PATCH 0739/1586] hwmon: (asus-ec-sensors) add driver for ASUS EC This driver provides the same data as the asus_wmi_ec_sensors driver (and gets it from the same source) but does not use WMI, polling the ACPI EC directly. That provides two enhancements: sensor reading became quicker (on some systems or kernel configuration it took almost a full second to read all the sensors, that transfers less than 15 bytes of data), the driver became more flexible. The driver now relies on ACPI mutex to lock access to the EC in the same way as the WMI code does. Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220124015658.687309-2-eugene.shalygin@gmail.com Tested-by: Oleksandr Natalenko Tested-by: Denis Pauk Signed-off-by: Guenter Roeck --- MAINTAINERS | 6 + drivers/hwmon/Kconfig | 11 + drivers/hwmon/Makefile | 1 + drivers/hwmon/asus-ec-sensors.c | 694 ++++++++++++++++++++++++++++++++ 4 files changed, 712 insertions(+) create mode 100644 drivers/hwmon/asus-ec-sensors.c diff --git a/MAINTAINERS b/MAINTAINERS index 293c925b6beb7..27a87a6646357 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3056,6 +3056,12 @@ L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/asus_wmi_ec_sensors.c +ASUS EC HARDWARE MONITOR DRIVER +M: Eugene Shalygin +L: linux-hwmon@vger.kernel.org +S: Maintained +F: drivers/hwmon/asus-ec-sensors.c + ASUS WIRELESS RADIO CONTROL DRIVER M: João Paulo Rechi Vita L: platform-driver-x86@vger.kernel.org diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 01ab80a2cc4a1..b75ec220da5ad 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -2263,6 +2263,17 @@ config SENSORS_ASUS_WMI_EC This driver can also be built as a module. If so, the module will be called asus_wmi_sensors_ec. +config SENSORS_ASUS_EC + tristate "ASUS EC Sensors" + help + If you say yes here you get support for the ACPI embedded controller + hardware monitoring interface found in ASUS motherboards. The driver + currently supports B550/X570 boards, although other ASUS boards might + provide this monitoring interface as well. + + This driver can also be built as a module. If so, the module + will be called asus_ec_sensors. + endif # ACPI endif # HWMON diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 185f946d698b0..7bba3415ca3df 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_HWMON_VID) += hwmon-vid.o # APCI drivers obj-$(CONFIG_SENSORS_ACPI_POWER) += acpi_power_meter.o obj-$(CONFIG_SENSORS_ATK0110) += asus_atk0110.o +obj-$(CONFIG_SENSORS_ASUS_EC) += asus-ec-sensors.o obj-$(CONFIG_SENSORS_ASUS_WMI) += asus_wmi_sensors.o obj-$(CONFIG_SENSORS_ASUS_WMI_EC) += asus_wmi_ec_sensors.o diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c new file mode 100644 index 0000000000000..7285334c7d801 --- /dev/null +++ b/drivers/hwmon/asus-ec-sensors.c @@ -0,0 +1,694 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HWMON driver for ASUS motherboards that publish some sensor values + * via the embedded controller registers. + * + * Copyright (C) 2021 Eugene Shalygin + + * EC provides: + * - Chipset temperature + * - CPU temperature + * - Motherboard temperature + * - T_Sensor temperature + * - VRM temperature + * - Water In temperature + * - Water Out temperature + * - CPU Optional fan RPM + * - Chipset fan RPM + * - VRM Heat Sink fan RPM + * - Water Flow fan RPM + * - CPU current + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static char *mutex_path_override; + +/* Writing to this EC register switches EC bank */ +#define ASUS_EC_BANK_REGISTER 0xff +#define SENSOR_LABEL_LEN 16 + +/* + * Arbitrary set max. allowed bank number. Required for sorting banks and + * currently is overkill with just 2 banks used at max, but for the sake + * of alignment let's set it to a higher value. + */ +#define ASUS_EC_MAX_BANK 3 + +#define ACPI_LOCK_DELAY_MS 500 + +/* ACPI mutex for locking access to the EC for the firmware */ +#define ASUS_HW_ACCESS_MUTEX_ASMX "\\AMW0.ASMX" + +/* There are two variants of the vendor spelling */ +#define VENDOR_ASUS_UPPER_CASE "ASUSTeK COMPUTER INC." + +typedef union { + u32 value; + struct { + u8 index; + u8 bank; + u8 size; + u8 dummy; + } components; +} sensor_address; + +#define MAKE_SENSOR_ADDRESS(size, bank, index) { \ + .value = (size << 16) + (bank << 8) + index \ + } + +static u32 hwmon_attributes[hwmon_max] = { + [hwmon_chip] = HWMON_C_REGISTER_TZ, + [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL, + [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL, + [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL, + [hwmon_fan] = HWMON_F_INPUT | HWMON_F_LABEL, +}; + +struct ec_sensor_info { + char label[SENSOR_LABEL_LEN]; + enum hwmon_sensor_types type; + sensor_address addr; +}; + +#define EC_SENSOR(sensor_label, sensor_type, size, bank, index) { \ + .label = sensor_label, .type = sensor_type, \ + .addr = MAKE_SENSOR_ADDRESS(size, bank, index), \ + } + +enum ec_sensors { + /* chipset temperature [℃] */ + ec_sensor_temp_chipset, + /* CPU temperature [℃] */ + ec_sensor_temp_cpu, + /* motherboard temperature [℃] */ + ec_sensor_temp_mb, + /* "T_Sensor" temperature sensor reading [℃] */ + ec_sensor_temp_t_sensor, + /* VRM temperature [℃] */ + ec_sensor_temp_vrm, + /* CPU_Opt fan [RPM] */ + ec_sensor_fan_cpu_opt, + /* VRM heat sink fan [RPM] */ + ec_sensor_fan_vrm_hs, + /* Chipset fan [RPM] */ + ec_sensor_fan_chipset, + /* Water flow sensor reading [RPM] */ + ec_sensor_fan_water_flow, + /* CPU current [A] */ + ec_sensor_curr_cpu, + /* "Water_In" temperature sensor reading [℃] */ + ec_sensor_temp_water_in, + /* "Water_Out" temperature sensor reading [℃] */ + ec_sensor_temp_water_out, +}; + +#define SENSOR_TEMP_CHIPSET BIT(ec_sensor_temp_chipset) +#define SENSOR_TEMP_CPU BIT(ec_sensor_temp_cpu) +#define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb) +#define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor) +#define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm) +#define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt) +#define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs) +#define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset) +#define SENSOR_FAN_WATER_FLOW BIT(ec_sensor_fan_water_flow) +#define SENSOR_CURR_CPU BIT(ec_sensor_curr_cpu) +#define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in) +#define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out) + +/* All the known sensors for ASUS EC controllers */ +static const struct ec_sensor_info known_ec_sensors[] = { + [ec_sensor_temp_chipset] = + EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a), + [ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b), + [ec_sensor_temp_mb] = + EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c), + [ec_sensor_temp_t_sensor] = + EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d), + [ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e), + [ec_sensor_fan_cpu_opt] = + EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0), + [ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2), + [ec_sensor_fan_chipset] = + EC_SENSOR("Chipset", hwmon_fan, 2, 0x00, 0xb4), + [ec_sensor_fan_water_flow] = + EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xbc), + [ec_sensor_curr_cpu] = EC_SENSOR("CPU", hwmon_curr, 1, 0x00, 0xf4), + [ec_sensor_temp_water_in] = + EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00), + [ec_sensor_temp_water_out] = + EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01), +}; + +/* Shortcuts for common combinations */ +#define SENSOR_SET_TEMP_CHIPSET_CPU_MB \ + (SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB) +#define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT) + +#define DMI_EXACT_MATCH_BOARD(vendor, name, sensors) { \ + .matches = { \ + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, vendor), \ + DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \ + }, \ + .driver_data = (void *)(sensors), \ +} + +static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "PRIME X570-PRO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | + SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII DARK HERO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII FORMULA", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII IMPACT", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING", + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS | + SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + {} +}; + +struct ec_sensor { + unsigned int info_index; + u32 cached_value; +}; + +struct ec_sensors_data { + unsigned long board_sensors; + struct ec_sensor *sensors; + /* EC registers to read from */ + u16 *registers; + u8 *read_buffer; + /* sorted list of unique register banks */ + u8 banks[ASUS_EC_MAX_BANK + 1]; + /* in jiffies */ + unsigned long last_updated; + acpi_handle aml_mutex; + /* number of board EC sensors */ + u8 nr_sensors; + /* + * number of EC registers to read + * (sensor might span more than 1 register) + */ + u8 nr_registers; + /* number of unique register banks */ + u8 nr_banks; +}; + +static u8 register_bank(u16 reg) +{ + return reg >> 8; +} + +static u8 register_index(u16 reg) +{ + return reg & 0x00ff; +} + +static const struct ec_sensor_info * +get_sensor_info(const struct ec_sensors_data *state, int index) +{ + return &known_ec_sensors[state->sensors[index].info_index]; +} + +static int find_ec_sensor_index(const struct ec_sensors_data *ec, + enum hwmon_sensor_types type, int channel) +{ + unsigned int i; + + for (i = 0; i < ec->nr_sensors; i++) { + if (get_sensor_info(ec, i)->type == type) { + if (channel == 0) + return i; + channel--; + } + } + return -ENOENT; +} + +static int __init bank_compare(const void *a, const void *b) +{ + return *((const s8 *)a) - *((const s8 *)b); +} + +static int __init board_sensors_count(unsigned long sensors) +{ + return hweight_long(sensors); +} + +static void __init setup_sensor_data(struct ec_sensors_data *ec) +{ + struct ec_sensor *s = ec->sensors; + bool bank_found; + int i, j; + u8 bank; + + ec->nr_banks = 0; + ec->nr_registers = 0; + + for_each_set_bit(i, &ec->board_sensors, + BITS_PER_TYPE(ec->board_sensors)) { + s->info_index = i; + s->cached_value = 0; + ec->nr_registers += + known_ec_sensors[s->info_index].addr.components.size; + bank_found = false; + bank = known_ec_sensors[s->info_index].addr.components.bank; + for (j = 0; j < ec->nr_banks; j++) { + if (ec->banks[j] == bank) { + bank_found = true; + break; + } + } + if (!bank_found) { + ec->banks[ec->nr_banks++] = bank; + } + s++; + } + sort(ec->banks, ec->nr_banks, 1, bank_compare, NULL); +} + +static void __init fill_ec_registers(struct ec_sensors_data *ec) +{ + const struct ec_sensor_info *si; + unsigned int i, j, register_idx = 0; + + for (i = 0; i < ec->nr_sensors; ++i) { + si = get_sensor_info(ec, i); + for (j = 0; j < si->addr.components.size; ++j, ++register_idx) { + ec->registers[register_idx] = + (si->addr.components.bank << 8) + + si->addr.components.index + j; + } + } +} + +static acpi_handle __init asus_hw_access_mutex(struct device *dev) +{ + const char *mutex_path; + acpi_handle res; + int status; + + mutex_path = mutex_path_override ? + mutex_path_override : ASUS_HW_ACCESS_MUTEX_ASMX; + + status = acpi_get_handle(NULL, (acpi_string)mutex_path, &res); + if (ACPI_FAILURE(status)) { + dev_err(dev, + "Could not get hardware access guard mutex '%s': error %d", + mutex_path, status); + return NULL; + } + return res; +} + +static int asus_ec_bank_switch(u8 bank, u8 *old) +{ + int status = 0; + + if (old) { + status = ec_read(ASUS_EC_BANK_REGISTER, old); + } + if (status || (old && (*old == bank))) + return status; + return ec_write(ASUS_EC_BANK_REGISTER, bank); +} + +static int asus_ec_block_read(const struct device *dev, + struct ec_sensors_data *ec) +{ + int ireg, ibank, status; + u8 bank, reg_bank, prev_bank; + + bank = 0; + status = asus_ec_bank_switch(bank, &prev_bank); + if (status) { + dev_warn(dev, "EC bank switch failed"); + return status; + } + + if (prev_bank) { + /* oops... somebody else is working with the EC too */ + dev_warn(dev, + "Concurrent access to the ACPI EC detected.\nRace condition possible."); + } + + /* read registers minimizing bank switches. */ + for (ibank = 0; ibank < ec->nr_banks; ibank++) { + if (bank != ec->banks[ibank]) { + bank = ec->banks[ibank]; + if (asus_ec_bank_switch(bank, NULL)) { + dev_warn(dev, "EC bank switch to %d failed", + bank); + break; + } + } + for (ireg = 0; ireg < ec->nr_registers; ireg++) { + reg_bank = register_bank(ec->registers[ireg]); + if (reg_bank < bank) { + continue; + } + ec_read(register_index(ec->registers[ireg]), + ec->read_buffer + ireg); + } + } + + status = asus_ec_bank_switch(prev_bank, NULL); + return status; +} + +static inline u32 get_sensor_value(const struct ec_sensor_info *si, u8 *data) +{ + switch (si->addr.components.size) { + case 1: + return *data; + case 2: + return get_unaligned_be16(data); + case 4: + return get_unaligned_be32(data); + default: + return 0; + } +} + +static void update_sensor_values(struct ec_sensors_data *ec, u8 *data) +{ + const struct ec_sensor_info *si; + struct ec_sensor *s; + + for (s = ec->sensors; s != ec->sensors + ec->nr_sensors; s++) { + si = &known_ec_sensors[s->info_index]; + s->cached_value = get_sensor_value(si, data); + data += si->addr.components.size; + } +} + +static int update_ec_sensors(const struct device *dev, + struct ec_sensors_data *ec) +{ + int status; + + /* + * ASUS DSDT does not specify that access to the EC has to be guarded, + * but firmware does access it via ACPI + */ + if (ACPI_FAILURE(acpi_acquire_mutex(ec->aml_mutex, NULL, + ACPI_LOCK_DELAY_MS))) { + dev_err(dev, "Failed to acquire AML mutex"); + status = -EBUSY; + goto cleanup; + } + + status = asus_ec_block_read(dev, ec); + + if (!status) { + update_sensor_values(ec, ec->read_buffer); + } + if (ACPI_FAILURE(acpi_release_mutex(ec->aml_mutex, NULL))) { + dev_err(dev, "Failed to release AML mutex"); + } +cleanup: + return status; +} + +static int scale_sensor_value(u32 value, int data_type) +{ + switch (data_type) { + case hwmon_curr: + case hwmon_temp: + case hwmon_in: + return value * MILLI; + default: + return value; + } +} + +static int get_cached_value_or_update(const struct device *dev, + int sensor_index, + struct ec_sensors_data *state, u32 *value) +{ + if (time_after(jiffies, state->last_updated + HZ)) { + if (update_ec_sensors(dev, state)) { + dev_err(dev, "update_ec_sensors() failure\n"); + return -EIO; + } + + state->last_updated = jiffies; + } + + *value = state->sensors[sensor_index].cached_value; + return 0; +} + +/* + * Now follow the functions that implement the hwmon interface + */ + +static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + int ret; + u32 value = 0; + + struct ec_sensors_data *state = dev_get_drvdata(dev); + int sidx = find_ec_sensor_index(state, type, channel); + + if (sidx < 0) { + return sidx; + } + + ret = get_cached_value_or_update(dev, sidx, state, &value); + if (!ret) { + *val = scale_sensor_value(value, + get_sensor_info(state, sidx)->type); + } + + return ret; +} + +static int asus_ec_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + struct ec_sensors_data *state = dev_get_drvdata(dev); + int sensor_index = find_ec_sensor_index(state, type, channel); + *str = get_sensor_info(state, sensor_index)->label; + + return 0; +} + +static umode_t asus_ec_hwmon_is_visible(const void *drvdata, + enum hwmon_sensor_types type, u32 attr, + int channel) +{ + const struct ec_sensors_data *state = drvdata; + + return find_ec_sensor_index(state, type, channel) >= 0 ? S_IRUGO : 0; +} + +static int __init +asus_ec_hwmon_add_chan_info(struct hwmon_channel_info *asus_ec_hwmon_chan, + struct device *dev, int num, + enum hwmon_sensor_types type, u32 config) +{ + int i; + u32 *cfg = devm_kcalloc(dev, num + 1, sizeof(*cfg), GFP_KERNEL); + + if (!cfg) + return -ENOMEM; + + asus_ec_hwmon_chan->type = type; + asus_ec_hwmon_chan->config = cfg; + for (i = 0; i < num; i++, cfg++) + *cfg = config; + + return 0; +} + +static const struct hwmon_ops asus_ec_hwmon_ops = { + .is_visible = asus_ec_hwmon_is_visible, + .read = asus_ec_hwmon_read, + .read_string = asus_ec_hwmon_read_string, +}; + +static struct hwmon_chip_info asus_ec_chip_info = { + .ops = &asus_ec_hwmon_ops, +}; + +static unsigned long __init +get_board_sensors(const struct device *dev) +{ + const struct dmi_system_id *dmi_entry; + + dmi_entry = dmi_first_match(asus_ec_dmi_table); + if (!dmi_entry) { + dev_info(dev, "Unsupported board"); + return 0; + } + + return (unsigned long)dmi_entry->driver_data; +} + +static int __init configure_sensor_setup(struct device *dev) +{ + struct ec_sensors_data *ec_data = dev_get_drvdata(dev); + int nr_count[hwmon_max] = { 0 }, nr_types = 0; + struct device *hwdev; + struct hwmon_channel_info *asus_ec_hwmon_chan; + const struct hwmon_channel_info **ptr_asus_ec_ci; + const struct hwmon_chip_info *chip_info; + const struct ec_sensor_info *si; + enum hwmon_sensor_types type; + unsigned int i; + + ec_data->board_sensors = get_board_sensors(dev); + if (!ec_data->board_sensors) { + return -ENODEV; + } + + ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors); + ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, + sizeof(struct ec_sensor), GFP_KERNEL); + + setup_sensor_data(ec_data); + ec_data->registers = devm_kcalloc(dev, ec_data->nr_registers, + sizeof(u16), GFP_KERNEL); + ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers, + sizeof(u8), GFP_KERNEL); + + if (!ec_data->registers || !ec_data->read_buffer) { + return -ENOMEM; + } + + fill_ec_registers(ec_data); + + ec_data->aml_mutex = asus_hw_access_mutex(dev); + + for (i = 0; i < ec_data->nr_sensors; ++i) { + si = get_sensor_info(ec_data, i); + if (!nr_count[si->type]) + ++nr_types; + ++nr_count[si->type]; + } + + if (nr_count[hwmon_temp]) + nr_count[hwmon_chip]++, nr_types++; + + asus_ec_hwmon_chan = devm_kcalloc( + dev, nr_types, sizeof(*asus_ec_hwmon_chan), GFP_KERNEL); + if (!asus_ec_hwmon_chan) + return -ENOMEM; + + ptr_asus_ec_ci = devm_kcalloc(dev, nr_types + 1, + sizeof(*ptr_asus_ec_ci), GFP_KERNEL); + if (!ptr_asus_ec_ci) + return -ENOMEM; + + asus_ec_chip_info.info = ptr_asus_ec_ci; + chip_info = &asus_ec_chip_info; + + for (type = 0; type < hwmon_max; ++type) { + if (!nr_count[type]) + continue; + + asus_ec_hwmon_add_chan_info(asus_ec_hwmon_chan, dev, + nr_count[type], type, + hwmon_attributes[type]); + *ptr_asus_ec_ci++ = asus_ec_hwmon_chan++; + } + + dev_info(dev, "board has %d EC sensors that span %d registers", + ec_data->nr_sensors, ec_data->nr_registers); + + hwdev = devm_hwmon_device_register_with_info(dev, "asusec", + ec_data, chip_info, NULL); + + return PTR_ERR_OR_ZERO(hwdev); +} + +static int __init asus_ec_probe(struct platform_device *pdev) +{ + struct asus_ec_sensors *state; + int status = 0; + + state = devm_kzalloc(&pdev->dev, sizeof(struct ec_sensors_data), + GFP_KERNEL); + + if (!state) { + return -ENOMEM; + } + + dev_set_drvdata(&pdev->dev, state); + status = configure_sensor_setup(&pdev->dev); + return status; +} + +static const struct acpi_device_id acpi_ec_ids[] = { + /* Embedded Controller Device */ + { "PNP0C09", 0 }, + {} +}; + +static struct platform_driver asus_ec_sensors_platform_driver = { + .driver = { + .name = "asus-ec-sensors", + .acpi_match_table = acpi_ec_ids, + }, +}; + +MODULE_DEVICE_TABLE(dmi, asus_ec_dmi_table); +module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe); + +module_param_named(mutex_path, mutex_path_override, charp, 0); +MODULE_PARM_DESC(mutex_path, + "Override ACPI mutex path used to guard access to hardware"); + +MODULE_AUTHOR("Eugene Shalygin "); +MODULE_DESCRIPTION( + "HWMON driver for sensors accessible via ACPI EC in ASUS motherboards"); +MODULE_LICENSE("GPL"); -- GitLab From 0314c6ac9e98d7551554fe422128726cd2849a8e Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Mon, 24 Jan 2022 02:56:44 +0100 Subject: [PATCH 0740/1586] hwmon: (asus-ec-sensors) update documentation Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220124015658.687309-3-eugene.shalygin@gmail.com Tested-by: Oleksandr Natalenko Tested-by: Denis Pauk [groeck: update index.rst, do not drop asus_wmi_ec_sensors.rst] Signed-off-by: Guenter Roeck --- Documentation/hwmon/asus_ec_sensors.rst | 52 +++++++++++++++++++++++++ Documentation/hwmon/index.rst | 1 + 2 files changed, 53 insertions(+) create mode 100644 Documentation/hwmon/asus_ec_sensors.rst diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst new file mode 100644 index 0000000000000..b12ac7ebeb1aa --- /dev/null +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -0,0 +1,52 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +Kernel driver asus_ec_sensors +================================= + +Supported boards: + * PRIME X570-PRO, + * Pro WS X570-ACE, + * ROG CROSSHAIR VIII DARK HERO, + * ROG CROSSHAIR VIII FORMULA, + * ROG CROSSHAIR VIII HERO, + * ROG CROSSHAIR VIII IMPACT, + * ROG STRIX B550-E GAMING, + * ROG STRIX B550-I GAMING, + * ROG STRIX X570-E GAMING, + * ROG STRIX X570-F GAMING, + * ROG STRIX X570-I GAMING + +Authors: + - Eugene Shalygin + +Description: +------------ +ASUS mainboards publish hardware monitoring information via Super I/O +chip and the ACPI embedded controller (EC) registers. Some of the sensors +are only available via the EC. + +The driver is aware of and reads the following sensors: + +1. Chipset (PCH) temperature +2. CPU package temperature +3. Motherboard temperature +4. Readings from the T_Sensor header +5. VRM temperature +6. CPU_Opt fan RPM +7. VRM heatsink fan RPM +8. Chipset fan RPM +9. Readings from the "Water flow meter" header (RPM) +10. Readings from the "Water In" and "Water Out" temperature headers +11. CPU current + +Sensor values are read from EC registers, and to avoid race with the board +firmware the driver acquires ACPI mutex, the one used by the WMI when its +methods access the EC. + +Module Parameters +----------------- + * mutex_path: string + The driver holds path to the ACPI mutex for each board (actually, + the path is mostly identical for them). If ASUS changes this path + in a future BIOS update, this parameter can be used to override + the stored in the driver value until it gets updated. diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index df20022c741f1..b69fdaf1af82e 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst @@ -43,6 +43,7 @@ Hardware Monitoring Kernel Drivers asb100 asc7621 aspeed-pwm-tacho + asus_ec_sensors asus_wmi_ec_sensors asus_wmi_sensors bcm54140 -- GitLab From 4408d3600f5af8abc3258900526ae8c16b106e81 Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Mon, 24 Jan 2022 02:56:45 +0100 Subject: [PATCH 0741/1586] hwmon: deprecate asis_wmi_ec_sensors driver Deprecate the asus_wmi_ec_sensors driver in favor of the asus_ec_sensors Signed-off-by: Eugene Shalygin Reviewed-by: Oleksandr Natalenko Link: https://lore.kernel.org/r/20220124015658.687309-4-eugene.shalygin@gmail.com Tested-by: Oleksandr Natalenko Tested-by: Denis Pauk Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index b75ec220da5ad..5f1506cca68f9 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -2253,13 +2253,16 @@ config SENSORS_ASUS_WMI config SENSORS_ASUS_WMI_EC tristate "ASUS WMI B550/X570" - depends on ACPI_WMI + depends on ACPI_WMI && SENSORS_ASUS_EC=n help If you say yes here you get support for the ACPI embedded controller hardware monitoring interface found in B550/X570 ASUS motherboards. This driver will provide readings of fans, voltages and temperatures through the system firmware. + This driver is deprecated in favor of the ASUS EC Sensors driver + which provides fully compatible output. + This driver can also be built as a module. If so, the module will be called asus_wmi_sensors_ec. -- GitLab From da74944d3a469ffc0e8229520afbf41ad01219b6 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 26 Jan 2022 16:18:25 +0200 Subject: [PATCH 0742/1586] hwmon: (mlxreg-fan) Use pwm attribute for setting fan speed low limit Recently 'cur_state' user space 'sysfs' interface 'sysfs' has been deprecated. This interface is used in Nvidia systems for setting fan speed limit. Currently fan speed limit is set from the user space by setting 'sysfs' 'cur_state' attribute to 'max_state + n', where 'n' is required limit, for example: 15 for 50% speed limit, 20 for full fan speed enforcement. The purpose of this feature is to provides ability to limit fan speed according to some system wise considerations, like absence of some replaceable units (PSU or line cards), high system ambient temperature, unreliable transceivers temperature sensing or some other factors which indirectly impacts system's airflow. The motivation is to support fan low limit feature through 'hwmon' interface. Use 'hwmon' 'pwm' attribute for setting low limit for fan speed in case 'thermal' subsystem is configured in kernel. In this case setting fan speed through 'hwmon' will never let the 'thermal' subsystem to select a lower duty cycle than the duty cycle selected with the 'pwm' attribute. From other side, fan speed is to be updated in hardware through 'pwm' only in case the requested fan speed is above last speed set by 'thermal' subsystem, otherwise requested fan speed will be just stored with no PWM update. Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220126141825.13545-1-vadimp@nvidia.com Signed-off-by: Guenter Roeck --- drivers/hwmon/mlxreg-fan.c | 84 ++++++++++++-------------------------- 1 file changed, 27 insertions(+), 57 deletions(-) diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c index 4a8becdb0d582..b48bd7c961d66 100644 --- a/drivers/hwmon/mlxreg-fan.c +++ b/drivers/hwmon/mlxreg-fan.c @@ -18,15 +18,6 @@ #define MLXREG_FAN_MAX_STATE 10 #define MLXREG_FAN_MIN_DUTY 51 /* 20% */ #define MLXREG_FAN_MAX_DUTY 255 /* 100% */ -/* - * Minimum and maximum FAN allowed speed in percent: from 20% to 100%. Values - * MLXREG_FAN_MAX_STATE + x, where x is between 2 and 10 are used for - * setting FAN speed dynamic minimum. For example, if value is set to 14 (40%) - * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to - * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100. - */ -#define MLXREG_FAN_SPEED_MIN (MLXREG_FAN_MAX_STATE + 2) -#define MLXREG_FAN_SPEED_MAX (MLXREG_FAN_MAX_STATE * 2) #define MLXREG_FAN_SPEED_MIN_LEVEL 2 /* 20 percent */ #define MLXREG_FAN_TACHO_SAMPLES_PER_PULSE_DEF 44 #define MLXREG_FAN_TACHO_DIV_MIN 283 @@ -87,13 +78,16 @@ struct mlxreg_fan_tacho { * @connected: indicates if PWM is connected; * @reg: register offset; * @cooling: cooling device levels; + * @last_hwmon_state: last cooling state set by hwmon subsystem; + * @last_thermal_state: last cooling state set by thermal subsystem; * @cdev: cooling device; */ struct mlxreg_fan_pwm { struct mlxreg_fan *fan; bool connected; u32 reg; - u8 cooling_levels[MLXREG_FAN_MAX_STATE + 1]; + unsigned long last_hwmon_state; + unsigned long last_thermal_state; struct thermal_cooling_device *cdev; }; @@ -119,6 +113,9 @@ struct mlxreg_fan { int divider; }; +static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long state); + static int mlxreg_fan_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) @@ -213,6 +210,18 @@ mlxreg_fan_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, val > MLXREG_FAN_MAX_DUTY) return -EINVAL; pwm = &fan->pwm[channel]; + /* If thermal is configured - handle PWM limit setting. */ + if (IS_REACHABLE(CONFIG_THERMAL)) { + pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(val); + /* + * Update PWM only in case requested state is not less than the + * last thermal state. + */ + if (pwm->last_hwmon_state >= pwm->last_thermal_state) + return mlxreg_fan_set_cur_state(pwm->cdev, + pwm->last_hwmon_state); + return 0; + } return regmap_write(fan->regmap, pwm->reg, val); default: return -EOPNOTSUPP; @@ -338,58 +347,22 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, { struct mlxreg_fan_pwm *pwm = cdev->devdata; struct mlxreg_fan *fan = pwm->fan; - unsigned long cur_state; - int i, config = 0; - u32 regval; int err; - /* - * Verify if this request is for changing allowed FAN dynamical - * minimum. If it is - update cooling levels accordingly and update - * state, if current state is below the newly requested minimum state. - * For example, if current state is 5, and minimal state is to be - * changed from 4 to 6, fan->cooling_levels[0 to 5] will be changed all - * from 4 to 6. And state 5 (fan->cooling_levels[4]) should be - * overwritten. - */ - if (state >= MLXREG_FAN_SPEED_MIN && state <= MLXREG_FAN_SPEED_MAX) { - /* - * This is configuration change, which is only supported through sysfs. - * For configuration non-zero value is to be returned to avoid thermal - * statistics update. - */ - config = 1; - state -= MLXREG_FAN_MAX_STATE; - for (i = 0; i < state; i++) - pwm->cooling_levels[i] = state; - for (i = state; i <= MLXREG_FAN_MAX_STATE; i++) - pwm->cooling_levels[i] = i; - - err = regmap_read(fan->regmap, pwm->reg, ®val); - if (err) { - dev_err(fan->dev, "Failed to query PWM duty\n"); - return err; - } - - cur_state = MLXREG_FAN_PWM_DUTY2STATE(regval); - if (state < cur_state) - return config; - - state = cur_state; - } - if (state > MLXREG_FAN_MAX_STATE) return -EINVAL; - /* Normalize the state to the valid speed range. */ - state = pwm->cooling_levels[state]; + /* Save thermal state. */ + pwm->last_thermal_state = state; + + state = max_t(unsigned long, state, pwm->last_hwmon_state); err = regmap_write(fan->regmap, pwm->reg, MLXREG_FAN_PWM_STATE2DUTY(state)); if (err) { dev_err(fan->dev, "Failed to write PWM duty\n"); return err; } - return config; + return 0; } static const struct thermal_cooling_device_ops mlxreg_fan_cooling_ops = { @@ -564,7 +537,7 @@ static int mlxreg_fan_config(struct mlxreg_fan *fan, static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan) { - int i, j; + int i; for (i = 0; i < MLXREG_FAN_MAX_PWM; i++) { struct mlxreg_fan_pwm *pwm = &fan->pwm[i]; @@ -579,11 +552,8 @@ static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan) return PTR_ERR(pwm->cdev); } - /* Init cooling levels per PWM state. */ - for (j = 0; j < MLXREG_FAN_SPEED_MIN_LEVEL; j++) - pwm->cooling_levels[j] = MLXREG_FAN_SPEED_MIN_LEVEL; - for (j = MLXREG_FAN_SPEED_MIN_LEVEL; j <= MLXREG_FAN_MAX_STATE; j++) - pwm->cooling_levels[j] = j; + /* Set minimal PWM speed. */ + pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(MLXREG_FAN_MIN_DUTY); } return 0; -- GitLab From 06f7d6e75d1232fefc2c3c8567159963c38ec628 Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Thu, 27 Jan 2022 17:17:27 +0100 Subject: [PATCH 0743/1586] hwmon: (max6639) Update Datasheet URL The old Datasheet does not exist anymore. Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/76025f40d2684dc0d3ec02c8899b726b07a0e7da.1643299570.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- Documentation/hwmon/max6639.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/max6639.rst b/Documentation/hwmon/max6639.rst index 3da54225f83cd..c85d285a3489d 100644 --- a/Documentation/hwmon/max6639.rst +++ b/Documentation/hwmon/max6639.rst @@ -9,7 +9,7 @@ Supported chips: Addresses scanned: I2C 0x2c, 0x2e, 0x2f - Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6639.pdf + Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX6639-MAX6639F.pdf Authors: - He Changqing -- GitLab From 4e2271ea275193a24ea1c905a38fa000b0f8b822 Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Thu, 27 Jan 2022 17:17:28 +0100 Subject: [PATCH 0744/1586] hwmon: (max6639) Add regulator support Add regulator support for boards where the fan-supply have to be powered up before it can be used. Signed-off-by: Patrick Rudolph Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/2cb9ed600fb43cdc604799746fbde2e2942cdca6.1643299570.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- drivers/hwmon/max6639.c | 62 +++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c index ccc0f047bd446..14bb7726f8d7e 100644 --- a/drivers/hwmon/max6639.c +++ b/drivers/hwmon/max6639.c @@ -87,6 +87,9 @@ struct max6639_data { /* Register values initialized only once */ u8 ppr; /* Pulses per rotation 0..3 for 1..4 ppr */ u8 rpm_range; /* Index in above rpm_ranges table */ + + /* Optional regulator for FAN supply */ + struct regulator *reg; }; static struct max6639_data *max6639_update_device(struct device *dev) @@ -516,6 +519,11 @@ static int max6639_detect(struct i2c_client *client, return 0; } +static void max6639_regulator_disable(void *data) +{ + regulator_disable(data); +} + static int max6639_probe(struct i2c_client *client) { struct device *dev = &client->dev; @@ -528,6 +536,28 @@ static int max6639_probe(struct i2c_client *client) return -ENOMEM; data->client = client; + + data->reg = devm_regulator_get_optional(dev, "fan"); + if (IS_ERR(data->reg)) { + if (PTR_ERR(data->reg) != -ENODEV) + return PTR_ERR(data->reg); + + data->reg = NULL; + } else { + /* Spin up fans */ + err = regulator_enable(data->reg); + if (err) { + dev_err(dev, "Failed to enable fan supply: %d\n", err); + return err; + } + err = devm_add_action_or_reset(dev, max6639_regulator_disable, + data->reg); + if (err) { + dev_err(dev, "Failed to register action: %d\n", err); + return err; + } + } + mutex_init(&data->update_lock); /* Initialize the max6639 chip */ @@ -545,23 +575,39 @@ static int max6639_probe(struct i2c_client *client) static int max6639_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - int data = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG); - if (data < 0) - return data; + struct max6639_data *data = dev_get_drvdata(dev); + int ret = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG); + + if (ret < 0) + return ret; + + if (data->reg) + regulator_disable(data->reg); return i2c_smbus_write_byte_data(client, - MAX6639_REG_GCONFIG, data | MAX6639_GCONFIG_STANDBY); + MAX6639_REG_GCONFIG, ret | MAX6639_GCONFIG_STANDBY); } static int max6639_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - int data = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG); - if (data < 0) - return data; + struct max6639_data *data = dev_get_drvdata(dev); + int ret; + + if (data->reg) { + ret = regulator_enable(data->reg); + if (ret) { + dev_err(dev, "Failed to enable fan supply: %d\n", ret); + return ret; + } + } + + ret = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG); + if (ret < 0) + return ret; return i2c_smbus_write_byte_data(client, - MAX6639_REG_GCONFIG, data & ~MAX6639_GCONFIG_STANDBY); + MAX6639_REG_GCONFIG, ret & ~MAX6639_GCONFIG_STANDBY); } #endif /* CONFIG_PM_SLEEP */ -- GitLab From 2f66cb5bf35265a8c9413a019166a0c29823ce57 Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Thu, 3 Feb 2022 21:30:52 +0100 Subject: [PATCH 0745/1586] hwmon: (asus-ec-sensors) Add Crosshair VIII Hero WiFi The Wi-Fi variant of Crosshair VIII Hero provides the same sensors, which was tested by a Libre Hardware Monitor user [1]. [1] https://github.com/LibreHardwareMonitor/LibreHardwareMonitor/pull/453#issuecomment-1028398487 Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220203203052.441712-1-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/asus_ec_sensors.rst | 1 + drivers/hwmon/asus-ec-sensors.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst index b12ac7ebeb1aa..22de1b037cfba 100644 --- a/Documentation/hwmon/asus_ec_sensors.rst +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -7,6 +7,7 @@ Supported boards: * PRIME X570-PRO, * Pro WS X570-ACE, * ROG CROSSHAIR VIII DARK HERO, + * ROG CROSSHAIR VIII HERO (WI-FI) * ROG CROSSHAIR VIII FORMULA, * ROG CROSSHAIR VIII HERO, * ROG CROSSHAIR VIII IMPACT, diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 7285334c7d801..05244209c0c6e 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -188,6 +188,12 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII HERO (WI-FI)", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII IMPACT", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | -- GitLab From 88846ff7422089f576e29b68153034babbc68ce6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Feb 2022 12:20:15 +0300 Subject: [PATCH 0746/1586] hwmon: (asus-ec-sensors) fix a typo in asus_ec_probe() There is no such struct as "asus_ec_sensors", it was supposed to be "ec_sensors_data". This typo does not affect either build or runtime. Fixes: c4b1687d6897 ("hwmon: (asus-ec-sensors) add driver for ASUS EC") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20220205092015.GA612@kili Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 05244209c0c6e..7330cce46b14a 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -659,7 +659,7 @@ static int __init configure_sensor_setup(struct device *dev) static int __init asus_ec_probe(struct platform_device *pdev) { - struct asus_ec_sensors *state; + struct ec_sensors_data *state; int status = 0; state = devm_kzalloc(&pdev->dev, sizeof(struct ec_sensors_data), -- GitLab From 339f8a998f58e8693f73429e7597f2ffc9b93ec5 Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Fri, 4 Feb 2022 17:30:45 +0100 Subject: [PATCH 0747/1586] hwmon: (asus-ec-sensors) read sensors as signed ints Temperature sensor readings are signed, which is hinted by their blank value (oxd8, 216 as unsigned and -40 as signed). T_Sensor, Crosshair VIII Hero, and a freezer were used to confirm that. Here we read fan sensors as signed too, because with their typical values and 2-byte width, I can't tell a difference between signed and unsigned, as I don't have a high speed chipset fan. Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220204163045.576903-1-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 7330cce46b14a..dd7b207d062fd 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -221,7 +221,7 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { struct ec_sensor { unsigned int info_index; - u32 cached_value; + s32 cached_value; }; struct ec_sensors_data { @@ -408,15 +408,15 @@ static int asus_ec_block_read(const struct device *dev, return status; } -static inline u32 get_sensor_value(const struct ec_sensor_info *si, u8 *data) +static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data) { switch (si->addr.components.size) { case 1: - return *data; + return (s8)*data; case 2: - return get_unaligned_be16(data); + return (s16)get_unaligned_be16(data); case 4: - return get_unaligned_be32(data); + return (s32)get_unaligned_be32(data); default: return 0; } @@ -462,7 +462,7 @@ cleanup: return status; } -static int scale_sensor_value(u32 value, int data_type) +static long scale_sensor_value(s32 value, int data_type) { switch (data_type) { case hwmon_curr: @@ -476,7 +476,7 @@ static int scale_sensor_value(u32 value, int data_type) static int get_cached_value_or_update(const struct device *dev, int sensor_index, - struct ec_sensors_data *state, u32 *value) + struct ec_sensors_data *state, s32 *value) { if (time_after(jiffies, state->last_updated + HZ)) { if (update_ec_sensors(dev, state)) { @@ -499,7 +499,7 @@ static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { int ret; - u32 value = 0; + s32 value = 0; struct ec_sensors_data *state = dev_get_drvdata(dev); int sidx = find_ec_sensor_index(state, type, channel); -- GitLab From 7979a30ddc560b372c0b9549f0587f56365cf6bf Mon Sep 17 00:00:00 2001 From: Denis Pauk Date: Mon, 7 Feb 2022 23:48:15 +0200 Subject: [PATCH 0748/1586] hwmon: (nct6775) add ASUS ROG STRIX Z390/Z490/X570-* / PRIME X570-P Boards such as * PRIME X570-P, * ROG STRIX B550-F GAMING WIFI II, * ROG STRIX B550-XE GAMING (WI-FI), * ROG STRIX X570-E GAMING, * ROG STRIX Z390-F GAMING, * ROG STRIX Z390-H GAMING, * ROG STRIX Z390-I GAMING, * ROG STRIX Z490-A GAMING, * ROG STRIX Z490-E GAMING, * ROG STRIX Z490-F GAMING, * ROG STRIX Z490-G GAMING, * ROG STRIX Z490-G GAMING (WI-FI), * ROG STRIX Z490-H GAMING have got a nct6775 chip, but by default there's no use of it because of resource conflict with WMI method. This commit adds such boards to the WMI monitoring list. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204807 Signed-off-by: Denis Pauk Tested-by: Per Melin Tested-by: Jaap de Haan Link: https://lore.kernel.org/r/20220207214815.10995-1-pauk.denis@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index fbf6266c7ba7b..2b91f7e05126e 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -5109,6 +5109,7 @@ static const char * const asus_wmi_boards[] = { "PRIME B550-PLUS", "PRIME B550M-A", "PRIME B550M-A (WI-FI)", + "PRIME X570-P", "PRIME X570-PRO", "ROG CROSSHAIR VIII DARK HERO", "ROG CROSSHAIR VIII FORMULA", @@ -5118,10 +5119,22 @@ static const char * const asus_wmi_boards[] = { "ROG STRIX B550-E GAMING", "ROG STRIX B550-F GAMING", "ROG STRIX B550-F GAMING (WI-FI)", + "ROG STRIX B550-F GAMING WIFI II", "ROG STRIX B550-I GAMING", + "ROG STRIX B550-XE GAMING (WI-FI)", + "ROG STRIX X570-E GAMING", "ROG STRIX X570-F GAMING", "ROG STRIX X570-I GAMING", "ROG STRIX Z390-E GAMING", + "ROG STRIX Z390-F GAMING", + "ROG STRIX Z390-H GAMING", + "ROG STRIX Z390-I GAMING", + "ROG STRIX Z490-A GAMING", + "ROG STRIX Z490-E GAMING", + "ROG STRIX Z490-F GAMING", + "ROG STRIX Z490-G GAMING", + "ROG STRIX Z490-G GAMING (WI-FI)", + "ROG STRIX Z490-H GAMING", "ROG STRIX Z490-I GAMING", "TUF GAMING B550M-PLUS", "TUF GAMING B550M-PLUS (WI-FI)", -- GitLab From f53207017fd99aaeecef6e61b4a3d8e868f53756 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 23 Dec 2021 10:32:05 -0800 Subject: [PATCH 0749/1586] hwmon: (adt7x10) Convert to use regmap Using regmap lets us use the regmap subsystem for SPI vs. I2C register accesses. It lets us hide access differences in backend code and lets the common code just access registers without knowing their size. We can also use regmap for register caching. Tested-by: Cosmin Tanislav Reviewed-by: Cosmin Tanislav Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 1 + drivers/hwmon/adt7310.c | 88 +++++++++++++--- drivers/hwmon/adt7410.c | 77 ++++++++++---- drivers/hwmon/adt7x10.c | 225 +++++++++++++++------------------------- drivers/hwmon/adt7x10.h | 10 +- 5 files changed, 213 insertions(+), 188 deletions(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 5f1506cca68f9..387253ae6c149 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -174,6 +174,7 @@ config SENSORS_ADM9240 config SENSORS_ADT7X10 tristate + select REGMAP help This module contains common code shared by the ADT7310/ADT7320 and ADT7410/ADT7420 temperature monitoring chip drivers. diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c index c40cac16af683..a83092470bce4 100644 --- a/drivers/hwmon/adt7310.c +++ b/drivers/hwmon/adt7310.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -38,16 +39,13 @@ static const u8 adt7310_reg_table[] = { #define AD7310_COMMAND(reg) (adt7310_reg_table[(reg)] << ADT7310_CMD_REG_OFFSET) -static int adt7310_spi_read_word(struct device *dev, u8 reg) +static int adt7310_spi_read_word(struct spi_device *spi, u8 reg) { - struct spi_device *spi = to_spi_device(dev); - return spi_w8r16be(spi, AD7310_COMMAND(reg) | ADT7310_CMD_READ); } -static int adt7310_spi_write_word(struct device *dev, u8 reg, u16 data) +static int adt7310_spi_write_word(struct spi_device *spi, u8 reg, u16 data) { - struct spi_device *spi = to_spi_device(dev); u8 buf[3]; buf[0] = AD7310_COMMAND(reg); @@ -56,17 +54,13 @@ static int adt7310_spi_write_word(struct device *dev, u8 reg, u16 data) return spi_write(spi, buf, sizeof(buf)); } -static int adt7310_spi_read_byte(struct device *dev, u8 reg) +static int adt7310_spi_read_byte(struct spi_device *spi, u8 reg) { - struct spi_device *spi = to_spi_device(dev); - return spi_w8r8(spi, AD7310_COMMAND(reg) | ADT7310_CMD_READ); } -static int adt7310_spi_write_byte(struct device *dev, u8 reg, - u8 data) +static int adt7310_spi_write_byte(struct spi_device *spi, u8 reg, u8 data) { - struct spi_device *spi = to_spi_device(dev); u8 buf[2]; buf[0] = AD7310_COMMAND(reg); @@ -75,17 +69,77 @@ static int adt7310_spi_write_byte(struct device *dev, u8 reg, return spi_write(spi, buf, sizeof(buf)); } -static const struct adt7x10_ops adt7310_spi_ops = { - .read_word = adt7310_spi_read_word, - .write_word = adt7310_spi_write_word, - .read_byte = adt7310_spi_read_byte, - .write_byte = adt7310_spi_write_byte, +static bool adt7310_regmap_is_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case ADT7X10_TEMPERATURE: + case ADT7X10_STATUS: + return true; + default: + return false; + } +} + +static int adt7310_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + struct spi_device *spi = context; + int regval; + + switch (reg) { + case ADT7X10_TEMPERATURE: + case ADT7X10_T_ALARM_HIGH: + case ADT7X10_T_ALARM_LOW: + case ADT7X10_T_CRIT: + regval = adt7310_spi_read_word(spi, reg); + break; + default: + regval = adt7310_spi_read_byte(spi, reg); + break; + } + if (regval < 0) + return regval; + *val = regval; + return 0; +} + +static int adt7310_reg_write(void *context, unsigned int reg, unsigned int val) +{ + struct spi_device *spi = context; + int ret; + + switch (reg) { + case ADT7X10_TEMPERATURE: + case ADT7X10_T_ALARM_HIGH: + case ADT7X10_T_ALARM_LOW: + case ADT7X10_T_CRIT: + ret = adt7310_spi_write_word(spi, reg, val); + break; + default: + ret = adt7310_spi_write_byte(spi, reg, val); + break; + } + return ret; +} + +static const struct regmap_config adt7310_regmap_config = { + .reg_bits = 8, + .val_bits = 16, + .cache_type = REGCACHE_RBTREE, + .volatile_reg = adt7310_regmap_is_volatile, + .reg_read = adt7310_reg_read, + .reg_write = adt7310_reg_write, }; static int adt7310_spi_probe(struct spi_device *spi) { + struct regmap *regmap; + + regmap = devm_regmap_init(&spi->dev, NULL, spi, &adt7310_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + return adt7x10_probe(&spi->dev, spi_get_device_id(spi)->name, spi->irq, - &adt7310_spi_ops); + regmap); } static int adt7310_spi_remove(struct spi_device *spi) diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c index 973db057427be..b1f4497dca051 100644 --- a/drivers/hwmon/adt7410.c +++ b/drivers/hwmon/adt7410.c @@ -9,43 +9,82 @@ #include #include #include +#include #include "adt7x10.h" -static int adt7410_i2c_read_word(struct device *dev, u8 reg) +static bool adt7410_regmap_is_volatile(struct device *dev, unsigned int reg) { - return i2c_smbus_read_word_swapped(to_i2c_client(dev), reg); + switch (reg) { + case ADT7X10_TEMPERATURE: + case ADT7X10_STATUS: + return true; + default: + return false; + } } -static int adt7410_i2c_write_word(struct device *dev, u8 reg, u16 data) +static int adt7410_reg_read(void *context, unsigned int reg, unsigned int *val) { - return i2c_smbus_write_word_swapped(to_i2c_client(dev), reg, data); -} + struct i2c_client *client = context; + int regval; -static int adt7410_i2c_read_byte(struct device *dev, u8 reg) -{ - return i2c_smbus_read_byte_data(to_i2c_client(dev), reg); + switch (reg) { + case ADT7X10_TEMPERATURE: + case ADT7X10_T_ALARM_HIGH: + case ADT7X10_T_ALARM_LOW: + case ADT7X10_T_CRIT: + regval = i2c_smbus_read_word_swapped(client, reg); + break; + default: + regval = i2c_smbus_read_byte_data(client, reg); + break; + } + if (regval < 0) + return regval; + *val = regval; + return 0; } -static int adt7410_i2c_write_byte(struct device *dev, u8 reg, u8 data) +static int adt7410_reg_write(void *context, unsigned int reg, unsigned int val) { - return i2c_smbus_write_byte_data(to_i2c_client(dev), reg, data); + struct i2c_client *client = context; + int ret; + + switch (reg) { + case ADT7X10_TEMPERATURE: + case ADT7X10_T_ALARM_HIGH: + case ADT7X10_T_ALARM_LOW: + case ADT7X10_T_CRIT: + ret = i2c_smbus_write_word_swapped(client, reg, val); + break; + default: + ret = i2c_smbus_write_byte_data(client, reg, val); + break; + } + return ret; } -static const struct adt7x10_ops adt7410_i2c_ops = { - .read_word = adt7410_i2c_read_word, - .write_word = adt7410_i2c_write_word, - .read_byte = adt7410_i2c_read_byte, - .write_byte = adt7410_i2c_write_byte, +static const struct regmap_config adt7410_regmap_config = { + .reg_bits = 8, + .val_bits = 16, + .max_register = ADT7X10_ID, + .cache_type = REGCACHE_RBTREE, + .volatile_reg = adt7410_regmap_is_volatile, + .reg_read = adt7410_reg_read, + .reg_write = adt7410_reg_write, }; static int adt7410_i2c_probe(struct i2c_client *client) { - if (!i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) - return -ENODEV; + struct regmap *regmap; + + regmap = devm_regmap_init(&client->dev, NULL, client, + &adt7410_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); - return adt7x10_probe(&client->dev, NULL, client->irq, &adt7410_i2c_ops); + return adt7x10_probe(&client->dev, NULL, client->irq, regmap); } static int adt7410_i2c_remove(struct i2c_client *client) diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c index e9d33aa78a193..05dd48b707b44 100644 --- a/drivers/hwmon/adt7x10.c +++ b/drivers/hwmon/adt7x10.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "adt7x10.h" @@ -53,46 +54,15 @@ /* Each client has this additional data */ struct adt7x10_data { - const struct adt7x10_ops *ops; + struct regmap *regmap; const char *name; struct device *hwmon_dev; struct mutex update_lock; u8 config; u8 oldconfig; - bool valid; /* true if registers valid */ - unsigned long last_updated; /* In jiffies */ - s16 temp[4]; /* Register values, - 0 = input - 1 = high - 2 = low - 3 = critical */ - u8 hyst; /* hysteresis offset */ + bool valid; /* true if temperature valid */ }; -static int adt7x10_read_byte(struct device *dev, u8 reg) -{ - struct adt7x10_data *d = dev_get_drvdata(dev); - return d->ops->read_byte(dev, reg); -} - -static int adt7x10_write_byte(struct device *dev, u8 reg, u8 data) -{ - struct adt7x10_data *d = dev_get_drvdata(dev); - return d->ops->write_byte(dev, reg, data); -} - -static int adt7x10_read_word(struct device *dev, u8 reg) -{ - struct adt7x10_data *d = dev_get_drvdata(dev); - return d->ops->read_word(dev, reg); -} - -static int adt7x10_write_word(struct device *dev, u8 reg, u16 data) -{ - struct adt7x10_data *d = dev_get_drvdata(dev); - return d->ops->write_word(dev, reg, data); -} - static const u8 ADT7X10_REG_TEMP[4] = { ADT7X10_TEMPERATURE, /* input */ ADT7X10_T_ALARM_HIGH, /* high */ @@ -103,10 +73,12 @@ static const u8 ADT7X10_REG_TEMP[4] = { static irqreturn_t adt7x10_irq_handler(int irq, void *private) { struct device *dev = private; - int status; + struct adt7x10_data *d = dev_get_drvdata(dev); + unsigned int status; + int ret; - status = adt7x10_read_byte(dev, ADT7X10_STATUS); - if (status < 0) + ret = regmap_read(d->regmap, ADT7X10_STATUS, &status); + if (ret < 0) return IRQ_HANDLED; if (status & ADT7X10_STAT_T_HIGH) @@ -119,14 +91,15 @@ static irqreturn_t adt7x10_irq_handler(int irq, void *private) return IRQ_HANDLED; } -static int adt7x10_temp_ready(struct device *dev) +static int adt7x10_temp_ready(struct regmap *regmap) { - int i, status; + unsigned int status; + int i, ret; for (i = 0; i < 6; i++) { - status = adt7x10_read_byte(dev, ADT7X10_STATUS); - if (status < 0) - return status; + ret = regmap_read(regmap, ADT7X10_STATUS, &status); + if (ret < 0) + return ret; if (!(status & ADT7X10_STAT_NOT_RDY)) return 0; msleep(60); @@ -134,71 +107,10 @@ static int adt7x10_temp_ready(struct device *dev) return -ETIMEDOUT; } -static int adt7x10_update_temp(struct device *dev) -{ - struct adt7x10_data *data = dev_get_drvdata(dev); - int ret = 0; - - mutex_lock(&data->update_lock); - - if (time_after(jiffies, data->last_updated + HZ + HZ / 2) - || !data->valid) { - int temp; - - dev_dbg(dev, "Starting update\n"); - - ret = adt7x10_temp_ready(dev); /* check for new value */ - if (ret) - goto abort; - - temp = adt7x10_read_word(dev, ADT7X10_REG_TEMP[0]); - if (temp < 0) { - ret = temp; - dev_dbg(dev, "Failed to read value: reg %d, error %d\n", - ADT7X10_REG_TEMP[0], ret); - goto abort; - } - data->temp[0] = temp; - data->last_updated = jiffies; - data->valid = true; - } - -abort: - mutex_unlock(&data->update_lock); - return ret; -} - -static int adt7x10_fill_cache(struct device *dev) -{ - struct adt7x10_data *data = dev_get_drvdata(dev); - int ret; - int i; - - for (i = 1; i < ARRAY_SIZE(data->temp); i++) { - ret = adt7x10_read_word(dev, ADT7X10_REG_TEMP[i]); - if (ret < 0) { - dev_dbg(dev, "Failed to read value: reg %d, error %d\n", - ADT7X10_REG_TEMP[i], ret); - return ret; - } - data->temp[i] = ret; - } - - ret = adt7x10_read_byte(dev, ADT7X10_T_HYST); - if (ret < 0) { - dev_dbg(dev, "Failed to read value: reg %d, error %d\n", - ADT7X10_T_HYST, ret); - return ret; - } - data->hyst = ret; - - return 0; -} - static s16 ADT7X10_TEMP_TO_REG(long temp) { return DIV_ROUND_CLOSEST(clamp_val(temp, ADT7X10_TEMP_MIN, - ADT7X10_TEMP_MAX) * 128, 1000); + ADT7X10_TEMP_MAX) * 128, 1000); } static int ADT7X10_REG_TO_TEMP(struct adt7x10_data *data, s16 reg) @@ -222,18 +134,26 @@ static ssize_t adt7x10_temp_show(struct device *dev, { struct sensor_device_attribute *attr = to_sensor_dev_attr(da); struct adt7x10_data *data = dev_get_drvdata(dev); + unsigned int val; + int ret; - - if (attr->index == 0) { - int ret; - - ret = adt7x10_update_temp(dev); - if (ret) + mutex_lock(&data->update_lock); + if (attr->index == 0 && !data->valid) { + /* wait for valid temperature */ + ret = adt7x10_temp_ready(data->regmap); + if (ret) { + mutex_unlock(&data->update_lock); return ret; + } + data->valid = true; } + mutex_unlock(&data->update_lock); - return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, - data->temp[attr->index])); + ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[attr->index], &val); + if (ret) + return ret; + + return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, val)); } static ssize_t adt7x10_temp_store(struct device *dev, @@ -251,12 +171,10 @@ static ssize_t adt7x10_temp_store(struct device *dev, return ret; mutex_lock(&data->update_lock); - data->temp[nr] = ADT7X10_TEMP_TO_REG(temp); - ret = adt7x10_write_word(dev, ADT7X10_REG_TEMP[nr], data->temp[nr]); - if (ret) - count = ret; + ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[nr], + ADT7X10_TEMP_TO_REG(temp)); mutex_unlock(&data->update_lock); - return count; + return ret ? : count; } static ssize_t adt7x10_t_hyst_show(struct device *dev, @@ -265,9 +183,21 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev, struct sensor_device_attribute *attr = to_sensor_dev_attr(da); struct adt7x10_data *data = dev_get_drvdata(dev); int nr = attr->index; - int hyst; + int hyst, temp, ret; + + mutex_lock(&data->update_lock); + ret = regmap_read(data->regmap, ADT7X10_T_HYST, &hyst); + if (ret) { + mutex_unlock(&data->update_lock); + return ret; + } + + ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[nr], &temp); + mutex_unlock(&data->update_lock); + if (ret) + return ret; - hyst = (data->hyst & ADT7X10_T_HYST_MASK) * 1000; + hyst = (hyst & ADT7X10_T_HYST_MASK) * 1000; /* * hysteresis is stored as a 4 bit offset in the device, convert it @@ -275,8 +205,8 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev, */ if (nr == 2) /* min has positive offset, others have negative */ hyst = -hyst; - return sprintf(buf, "%d\n", - ADT7X10_REG_TO_TEMP(data, data->temp[nr]) - hyst); + + return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, temp) - hyst); } static ssize_t adt7x10_t_hyst_store(struct device *dev, @@ -284,35 +214,45 @@ static ssize_t adt7x10_t_hyst_store(struct device *dev, const char *buf, size_t count) { struct adt7x10_data *data = dev_get_drvdata(dev); + unsigned int regval; int limit, ret; long hyst; ret = kstrtol(buf, 10, &hyst); if (ret) return ret; + + mutex_lock(&data->update_lock); + /* convert absolute hysteresis value to a 4 bit delta value */ - limit = ADT7X10_REG_TO_TEMP(data, data->temp[1]); - hyst = clamp_val(hyst, ADT7X10_TEMP_MIN, ADT7X10_TEMP_MAX); - data->hyst = clamp_val(DIV_ROUND_CLOSEST(limit - hyst, 1000), - 0, ADT7X10_T_HYST_MASK); - ret = adt7x10_write_byte(dev, ADT7X10_T_HYST, data->hyst); - if (ret) - return ret; + ret = regmap_read(data->regmap, ADT7X10_T_ALARM_HIGH, ®val); + if (ret < 0) + goto abort; - return count; + limit = ADT7X10_REG_TO_TEMP(data, regval); + + hyst = clamp_val(hyst, ADT7X10_TEMP_MIN, ADT7X10_TEMP_MAX); + regval = clamp_val(DIV_ROUND_CLOSEST(limit - hyst, 1000), 0, + ADT7X10_T_HYST_MASK); + ret = regmap_write(data->regmap, ADT7X10_T_HYST, regval); +abort: + mutex_unlock(&data->update_lock); + return ret ? : count; } static ssize_t adt7x10_alarm_show(struct device *dev, struct device_attribute *da, char *buf) { struct sensor_device_attribute *attr = to_sensor_dev_attr(da); + struct adt7x10_data *data = dev_get_drvdata(dev); + unsigned int status; int ret; - ret = adt7x10_read_byte(dev, ADT7X10_STATUS); + ret = regmap_read(data->regmap, ADT7X10_STATUS, &status); if (ret < 0) return ret; - return sprintf(buf, "%d\n", !!(ret & attr->index)); + return sprintf(buf, "%d\n", !!(status & attr->index)); } static ssize_t name_show(struct device *dev, struct device_attribute *da, @@ -357,28 +297,29 @@ static const struct attribute_group adt7x10_group = { }; int adt7x10_probe(struct device *dev, const char *name, int irq, - const struct adt7x10_ops *ops) + struct regmap *regmap) { struct adt7x10_data *data; + unsigned int config; int ret; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - data->ops = ops; + data->regmap = regmap; data->name = name; dev_set_drvdata(dev, data); mutex_init(&data->update_lock); /* configure as specified */ - ret = adt7x10_read_byte(dev, ADT7X10_CONFIG); + ret = regmap_read(regmap, ADT7X10_CONFIG, &config); if (ret < 0) { dev_dbg(dev, "Can't read config? %d\n", ret); return ret; } - data->oldconfig = ret; + data->oldconfig = config; /* * Set to 16 bit resolution, continous conversion and comparator mode. @@ -389,16 +330,12 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, data->config |= ADT7X10_FULL | ADT7X10_RESOLUTION | ADT7X10_EVENT_MODE; if (data->config != data->oldconfig) { - ret = adt7x10_write_byte(dev, ADT7X10_CONFIG, data->config); + ret = regmap_write(regmap, ADT7X10_CONFIG, data->config); if (ret) return ret; } dev_dbg(dev, "Config %02x\n", data->config); - ret = adt7x10_fill_cache(dev); - if (ret) - goto exit_restore; - /* Register sysfs hooks */ ret = sysfs_create_group(&dev->kobj, &adt7x10_group); if (ret) @@ -439,7 +376,7 @@ exit_remove_name: exit_remove: sysfs_remove_group(&dev->kobj, &adt7x10_group); exit_restore: - adt7x10_write_byte(dev, ADT7X10_CONFIG, data->oldconfig); + regmap_write(regmap, ADT7X10_CONFIG, data->oldconfig); return ret; } EXPORT_SYMBOL_GPL(adt7x10_probe); @@ -456,7 +393,7 @@ void adt7x10_remove(struct device *dev, int irq) device_remove_file(dev, &dev_attr_name); sysfs_remove_group(&dev->kobj, &adt7x10_group); if (data->oldconfig != data->config) - adt7x10_write_byte(dev, ADT7X10_CONFIG, data->oldconfig); + regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig); } EXPORT_SYMBOL_GPL(adt7x10_remove); @@ -466,15 +403,15 @@ static int adt7x10_suspend(struct device *dev) { struct adt7x10_data *data = dev_get_drvdata(dev); - return adt7x10_write_byte(dev, ADT7X10_CONFIG, - data->config | ADT7X10_PD); + return regmap_write(data->regmap, ADT7X10_CONFIG, + data->config | ADT7X10_PD); } static int adt7x10_resume(struct device *dev) { struct adt7x10_data *data = dev_get_drvdata(dev); - return adt7x10_write_byte(dev, ADT7X10_CONFIG, data->config); + return regmap_write(data->regmap, ADT7X10_CONFIG, data->config); } SIMPLE_DEV_PM_OPS(adt7x10_dev_pm_ops, adt7x10_suspend, adt7x10_resume); diff --git a/drivers/hwmon/adt7x10.h b/drivers/hwmon/adt7x10.h index a1ae682eb32e6..55ff08bfe9462 100644 --- a/drivers/hwmon/adt7x10.h +++ b/drivers/hwmon/adt7x10.h @@ -17,15 +17,9 @@ struct device; -struct adt7x10_ops { - int (*read_byte)(struct device *, u8 reg); - int (*write_byte)(struct device *, u8 reg, u8 data); - int (*read_word)(struct device *, u8 reg); - int (*write_word)(struct device *, u8 reg, u16 data); -}; - int adt7x10_probe(struct device *dev, const char *name, int irq, - const struct adt7x10_ops *ops); + struct regmap *regmap); + void adt7x10_remove(struct device *dev, int irq); #ifdef CONFIG_PM_SLEEP -- GitLab From af910e92886c35461ccc5ea234e17f448a46b876 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 21 Dec 2021 23:58:36 +0200 Subject: [PATCH 0750/1586] hwmon: (adt7x10) Add device managed action for restoring config To simplify the core driver remove function. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20211221215841.2641417-3-demonsingur@gmail.com [groeck: Adjust to use regmap; only register restore function if needed] Tested-by: Cosmin Tanislav Reviewed-by: Cosmin Tanislav Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7x10.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c index 05dd48b707b44..47ce1a88a03fa 100644 --- a/drivers/hwmon/adt7x10.c +++ b/drivers/hwmon/adt7x10.c @@ -296,6 +296,13 @@ static const struct attribute_group adt7x10_group = { .attrs = adt7x10_attributes, }; +static void adt7x10_restore_config(void *private) +{ + struct adt7x10_data *data = private; + + regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig); +} + int adt7x10_probe(struct device *dev, const char *name, int irq, struct regmap *regmap) { @@ -333,13 +340,16 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, ret = regmap_write(regmap, ADT7X10_CONFIG, data->config); if (ret) return ret; + ret = devm_add_action_or_reset(dev, adt7x10_restore_config, data); + if (ret) + return ret; } dev_dbg(dev, "Config %02x\n", data->config); /* Register sysfs hooks */ ret = sysfs_create_group(&dev->kobj, &adt7x10_group); if (ret) - goto exit_restore; + return ret; /* * The I2C device will already have it's own 'name' attribute, but for @@ -375,8 +385,6 @@ exit_remove_name: device_remove_file(dev, &dev_attr_name); exit_remove: sysfs_remove_group(&dev->kobj, &adt7x10_group); -exit_restore: - regmap_write(regmap, ADT7X10_CONFIG, data->oldconfig); return ret; } EXPORT_SYMBOL_GPL(adt7x10_probe); @@ -392,8 +400,6 @@ void adt7x10_remove(struct device *dev, int irq) if (data->name) device_remove_file(dev, &dev_attr_name); sysfs_remove_group(&dev->kobj, &adt7x10_group); - if (data->oldconfig != data->config) - regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig); } EXPORT_SYMBOL_GPL(adt7x10_remove); -- GitLab From a748d30c37bb7d238e5869f4f4d6037367493ebe Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 21 Dec 2021 23:58:37 +0200 Subject: [PATCH 0751/1586] hwmon: (adt7x10) Use devm_hwmon_device_register_with_info Describe the only available channel, implement read, write and is_visible callbacks. Also, pass name to core driver for the i2c device so that it can be used to register hwmon device. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20211221215841.2641417-4-demonsingur@gmail.com [groeck: Adjusted to use regmap] Tested-by: Cosmin Tanislav Reviewed-by: Cosmin Tanislav Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7410.c | 2 +- drivers/hwmon/adt7x10.c | 249 ++++++++++++++++++++-------------------- 2 files changed, 125 insertions(+), 126 deletions(-) diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c index b1f4497dca051..81950a079c2f8 100644 --- a/drivers/hwmon/adt7410.c +++ b/drivers/hwmon/adt7410.c @@ -84,7 +84,7 @@ static int adt7410_i2c_probe(struct i2c_client *client) if (IS_ERR(regmap)) return PTR_ERR(regmap); - return adt7x10_probe(&client->dev, NULL, client->irq, regmap); + return adt7x10_probe(&client->dev, client->name, client->irq, regmap); } static int adt7410_i2c_remove(struct i2c_client *client) diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c index 47ce1a88a03fa..9482fd8fff41f 100644 --- a/drivers/hwmon/adt7x10.c +++ b/drivers/hwmon/adt7x10.c @@ -8,12 +8,12 @@ * and adt7410.c from iio-staging by Sonic Zhang */ +#include #include #include #include #include #include -#include #include #include #include @@ -55,19 +55,24 @@ /* Each client has this additional data */ struct adt7x10_data { struct regmap *regmap; - const char *name; - struct device *hwmon_dev; struct mutex update_lock; u8 config; u8 oldconfig; bool valid; /* true if temperature valid */ }; -static const u8 ADT7X10_REG_TEMP[4] = { - ADT7X10_TEMPERATURE, /* input */ - ADT7X10_T_ALARM_HIGH, /* high */ - ADT7X10_T_ALARM_LOW, /* low */ - ADT7X10_T_CRIT, /* critical */ +enum { + adt7x10_temperature = 0, + adt7x10_t_alarm_high, + adt7x10_t_alarm_low, + adt7x10_t_crit, +}; + +static const u8 ADT7X10_REG_TEMP[] = { + [adt7x10_temperature] = ADT7X10_TEMPERATURE, /* input */ + [adt7x10_t_alarm_high] = ADT7X10_T_ALARM_HIGH, /* high */ + [adt7x10_t_alarm_low] = ADT7X10_T_ALARM_LOW, /* low */ + [adt7x10_t_crit] = ADT7X10_T_CRIT, /* critical */ }; static irqreturn_t adt7x10_irq_handler(int irq, void *private) @@ -127,18 +132,13 @@ static int ADT7X10_REG_TO_TEMP(struct adt7x10_data *data, s16 reg) /*-----------------------------------------------------------------------*/ -/* sysfs attributes for hwmon */ - -static ssize_t adt7x10_temp_show(struct device *dev, - struct device_attribute *da, char *buf) +static int adt7x10_temp_read(struct adt7x10_data *data, int index, long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(da); - struct adt7x10_data *data = dev_get_drvdata(dev); - unsigned int val; + unsigned int regval; int ret; mutex_lock(&data->update_lock); - if (attr->index == 0 && !data->valid) { + if (index == adt7x10_temperature && !data->valid) { /* wait for valid temperature */ ret = adt7x10_temp_ready(data->regmap); if (ret) { @@ -149,40 +149,27 @@ static ssize_t adt7x10_temp_show(struct device *dev, } mutex_unlock(&data->update_lock); - ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[attr->index], &val); + ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[index], ®val); if (ret) return ret; - return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, val)); + *val = ADT7X10_REG_TO_TEMP(data, regval); + return 0; } -static ssize_t adt7x10_temp_store(struct device *dev, - struct device_attribute *da, - const char *buf, size_t count) +static int adt7x10_temp_write(struct adt7x10_data *data, int index, long temp) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(da); - struct adt7x10_data *data = dev_get_drvdata(dev); - int nr = attr->index; - long temp; int ret; - ret = kstrtol(buf, 10, &temp); - if (ret) - return ret; - mutex_lock(&data->update_lock); - ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[nr], + ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[index], ADT7X10_TEMP_TO_REG(temp)); mutex_unlock(&data->update_lock); - return ret ? : count; + return ret; } -static ssize_t adt7x10_t_hyst_show(struct device *dev, - struct device_attribute *da, char *buf) +static int adt7x10_hyst_read(struct adt7x10_data *data, int index, long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(da); - struct adt7x10_data *data = dev_get_drvdata(dev); - int nr = attr->index; int hyst, temp, ret; mutex_lock(&data->update_lock); @@ -192,7 +179,7 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev, return ret; } - ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[nr], &temp); + ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[index], &temp); mutex_unlock(&data->update_lock); if (ret) return ret; @@ -203,24 +190,18 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev, * hysteresis is stored as a 4 bit offset in the device, convert it * to an absolute value */ - if (nr == 2) /* min has positive offset, others have negative */ + /* min has positive offset, others have negative */ + if (index == adt7x10_t_alarm_low) hyst = -hyst; - return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, temp) - hyst); + *val = ADT7X10_REG_TO_TEMP(data, temp) - hyst; + return 0; } -static ssize_t adt7x10_t_hyst_store(struct device *dev, - struct device_attribute *da, - const char *buf, size_t count) +static int adt7x10_hyst_write(struct adt7x10_data *data, long hyst) { - struct adt7x10_data *data = dev_get_drvdata(dev); unsigned int regval; int limit, ret; - long hyst; - - ret = kstrtol(buf, 10, &hyst); - if (ret) - return ret; mutex_lock(&data->update_lock); @@ -237,14 +218,11 @@ static ssize_t adt7x10_t_hyst_store(struct device *dev, ret = regmap_write(data->regmap, ADT7X10_T_HYST, regval); abort: mutex_unlock(&data->update_lock); - return ret ? : count; + return ret; } -static ssize_t adt7x10_alarm_show(struct device *dev, - struct device_attribute *da, char *buf) +static int adt7x10_alarm_read(struct adt7x10_data *data, int index, long *val) { - struct sensor_device_attribute *attr = to_sensor_dev_attr(da); - struct adt7x10_data *data = dev_get_drvdata(dev); unsigned int status; int ret; @@ -252,48 +230,102 @@ static ssize_t adt7x10_alarm_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", !!(status & attr->index)); + *val = !!(status & index); + + return 0; } -static ssize_t name_show(struct device *dev, struct device_attribute *da, - char *buf) +static umode_t adt7x10_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + switch (attr) { + case hwmon_temp_max: + case hwmon_temp_min: + case hwmon_temp_crit: + case hwmon_temp_max_hyst: + return 0644; + case hwmon_temp_input: + case hwmon_temp_min_alarm: + case hwmon_temp_max_alarm: + case hwmon_temp_crit_alarm: + case hwmon_temp_min_hyst: + case hwmon_temp_crit_hyst: + return 0444; + default: + break; + } + + return 0; +} + +static int adt7x10_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) { struct adt7x10_data *data = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", data->name); + switch (attr) { + case hwmon_temp_input: + return adt7x10_temp_read(data, adt7x10_temperature, val); + case hwmon_temp_max: + return adt7x10_temp_read(data, adt7x10_t_alarm_high, val); + case hwmon_temp_min: + return adt7x10_temp_read(data, adt7x10_t_alarm_low, val); + case hwmon_temp_crit: + return adt7x10_temp_read(data, adt7x10_t_crit, val); + case hwmon_temp_max_hyst: + return adt7x10_hyst_read(data, adt7x10_t_alarm_high, val); + case hwmon_temp_min_hyst: + return adt7x10_hyst_read(data, adt7x10_t_alarm_low, val); + case hwmon_temp_crit_hyst: + return adt7x10_hyst_read(data, adt7x10_t_crit, val); + case hwmon_temp_min_alarm: + return adt7x10_alarm_read(data, ADT7X10_STAT_T_LOW, val); + case hwmon_temp_max_alarm: + return adt7x10_alarm_read(data, ADT7X10_STAT_T_HIGH, val); + case hwmon_temp_crit_alarm: + return adt7x10_alarm_read(data, ADT7X10_STAT_T_CRIT, val); + default: + return -EOPNOTSUPP; + } } -static SENSOR_DEVICE_ATTR_RO(temp1_input, adt7x10_temp, 0); -static SENSOR_DEVICE_ATTR_RW(temp1_max, adt7x10_temp, 1); -static SENSOR_DEVICE_ATTR_RW(temp1_min, adt7x10_temp, 2); -static SENSOR_DEVICE_ATTR_RW(temp1_crit, adt7x10_temp, 3); -static SENSOR_DEVICE_ATTR_RW(temp1_max_hyst, adt7x10_t_hyst, 1); -static SENSOR_DEVICE_ATTR_RO(temp1_min_hyst, adt7x10_t_hyst, 2); -static SENSOR_DEVICE_ATTR_RO(temp1_crit_hyst, adt7x10_t_hyst, 3); -static SENSOR_DEVICE_ATTR_RO(temp1_min_alarm, adt7x10_alarm, - ADT7X10_STAT_T_LOW); -static SENSOR_DEVICE_ATTR_RO(temp1_max_alarm, adt7x10_alarm, - ADT7X10_STAT_T_HIGH); -static SENSOR_DEVICE_ATTR_RO(temp1_crit_alarm, adt7x10_alarm, - ADT7X10_STAT_T_CRIT); -static DEVICE_ATTR_RO(name); - -static struct attribute *adt7x10_attributes[] = { - &sensor_dev_attr_temp1_input.dev_attr.attr, - &sensor_dev_attr_temp1_max.dev_attr.attr, - &sensor_dev_attr_temp1_min.dev_attr.attr, - &sensor_dev_attr_temp1_crit.dev_attr.attr, - &sensor_dev_attr_temp1_max_hyst.dev_attr.attr, - &sensor_dev_attr_temp1_min_hyst.dev_attr.attr, - &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, - &sensor_dev_attr_temp1_min_alarm.dev_attr.attr, - &sensor_dev_attr_temp1_max_alarm.dev_attr.attr, - &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr, - NULL +static int adt7x10_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct adt7x10_data *data = dev_get_drvdata(dev); + + switch (attr) { + case hwmon_temp_max: + return adt7x10_temp_write(data, adt7x10_t_alarm_high, val); + case hwmon_temp_min: + return adt7x10_temp_write(data, adt7x10_t_alarm_low, val); + case hwmon_temp_crit: + return adt7x10_temp_write(data, adt7x10_t_crit, val); + case hwmon_temp_max_hyst: + return adt7x10_hyst_write(data, val); + default: + return -EOPNOTSUPP; + } +} + +static const struct hwmon_channel_info *adt7x10_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_CRIT | HWMON_T_MAX_HYST | HWMON_T_MIN_HYST | + HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | + HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM), + NULL, +}; + +static const struct hwmon_ops adt7x10_hwmon_ops = { + .is_visible = adt7x10_is_visible, + .read = adt7x10_read, + .write = adt7x10_write, }; -static const struct attribute_group adt7x10_group = { - .attrs = adt7x10_attributes, +static const struct hwmon_chip_info adt7x10_chip_info = { + .ops = &adt7x10_hwmon_ops, + .info = adt7x10_info, }; static void adt7x10_restore_config(void *private) @@ -308,6 +340,7 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, { struct adt7x10_data *data; unsigned int config; + struct device *hdev; int ret; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); @@ -315,7 +348,6 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, return -ENOMEM; data->regmap = regmap; - data->name = name; dev_set_drvdata(dev, data); mutex_init(&data->update_lock); @@ -346,60 +378,27 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, } dev_dbg(dev, "Config %02x\n", data->config); - /* Register sysfs hooks */ - ret = sysfs_create_group(&dev->kobj, &adt7x10_group); - if (ret) - return ret; - - /* - * The I2C device will already have it's own 'name' attribute, but for - * the SPI device we need to register it. name will only be non NULL if - * the device doesn't register the 'name' attribute on its own. - */ - if (name) { - ret = device_create_file(dev, &dev_attr_name); - if (ret) - goto exit_remove; - } - - data->hwmon_dev = hwmon_device_register(dev); - if (IS_ERR(data->hwmon_dev)) { - ret = PTR_ERR(data->hwmon_dev); - goto exit_remove_name; - } + hdev = devm_hwmon_device_register_with_info(dev, name, data, + &adt7x10_chip_info, NULL); + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (irq > 0) { ret = request_threaded_irq(irq, NULL, adt7x10_irq_handler, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, dev_name(dev), dev); if (ret) - goto exit_hwmon_device_unregister; + return ret; } return 0; - -exit_hwmon_device_unregister: - hwmon_device_unregister(data->hwmon_dev); -exit_remove_name: - if (name) - device_remove_file(dev, &dev_attr_name); -exit_remove: - sysfs_remove_group(&dev->kobj, &adt7x10_group); - return ret; } EXPORT_SYMBOL_GPL(adt7x10_probe); void adt7x10_remove(struct device *dev, int irq) { - struct adt7x10_data *data = dev_get_drvdata(dev); - if (irq > 0) free_irq(irq, dev); - - hwmon_device_unregister(data->hwmon_dev); - if (data->name) - device_remove_file(dev, &dev_attr_name); - sysfs_remove_group(&dev->kobj, &adt7x10_group); } EXPORT_SYMBOL_GPL(adt7x10_remove); -- GitLab From f691adc3e95ecd6ae01830f7f6140dce3bd730a2 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 21 Dec 2021 23:58:38 +0200 Subject: [PATCH 0752/1586] hwmon: (adt7x10) Use devm_request_threaded_irq To simplify the core driver remove function. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20211221215841.2641417-5-demonsingur@gmail.com Tested-by: Cosmin Tanislav Reviewed-by: Cosmin Tanislav Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7x10.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c index 9482fd8fff41f..147c28b241675 100644 --- a/drivers/hwmon/adt7x10.c +++ b/drivers/hwmon/adt7x10.c @@ -384,9 +384,11 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, return PTR_ERR(hdev); if (irq > 0) { - ret = request_threaded_irq(irq, NULL, adt7x10_irq_handler, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - dev_name(dev), dev); + ret = devm_request_threaded_irq(dev, irq, NULL, + adt7x10_irq_handler, + IRQF_TRIGGER_FALLING | + IRQF_ONESHOT, + dev_name(dev), dev); if (ret) return ret; } @@ -397,8 +399,6 @@ EXPORT_SYMBOL_GPL(adt7x10_probe); void adt7x10_remove(struct device *dev, int irq) { - if (irq > 0) - free_irq(irq, dev); } EXPORT_SYMBOL_GPL(adt7x10_remove); -- GitLab From 8331585ab3700662c0979244fb06522aa0d36fbc Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 21 Dec 2021 23:58:39 +0200 Subject: [PATCH 0753/1586] hwmon: (adt7x10) Remove empty driver removal callback Not used to do anything anymore. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20211221215841.2641417-6-demonsingur@gmail.com Tested-by: Cosmin Tanislav Reviewed-by: Cosmin Tanislav Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7310.c | 7 ------- drivers/hwmon/adt7410.c | 7 ------- drivers/hwmon/adt7x10.c | 5 ----- drivers/hwmon/adt7x10.h | 2 -- 4 files changed, 21 deletions(-) diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c index a83092470bce4..1efc0bdcceabd 100644 --- a/drivers/hwmon/adt7310.c +++ b/drivers/hwmon/adt7310.c @@ -142,12 +142,6 @@ static int adt7310_spi_probe(struct spi_device *spi) regmap); } -static int adt7310_spi_remove(struct spi_device *spi) -{ - adt7x10_remove(&spi->dev, spi->irq); - return 0; -} - static const struct spi_device_id adt7310_id[] = { { "adt7310", 0 }, { "adt7320", 0 }, @@ -161,7 +155,6 @@ static struct spi_driver adt7310_driver = { .pm = ADT7X10_DEV_PM_OPS, }, .probe = adt7310_spi_probe, - .remove = adt7310_spi_remove, .id_table = adt7310_id, }; module_spi_driver(adt7310_driver); diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c index 81950a079c2f8..aede5baca7b9f 100644 --- a/drivers/hwmon/adt7410.c +++ b/drivers/hwmon/adt7410.c @@ -87,12 +87,6 @@ static int adt7410_i2c_probe(struct i2c_client *client) return adt7x10_probe(&client->dev, client->name, client->irq, regmap); } -static int adt7410_i2c_remove(struct i2c_client *client) -{ - adt7x10_remove(&client->dev, client->irq); - return 0; -} - static const struct i2c_device_id adt7410_ids[] = { { "adt7410", 0 }, { "adt7420", 0 }, @@ -107,7 +101,6 @@ static struct i2c_driver adt7410_driver = { .pm = ADT7X10_DEV_PM_OPS, }, .probe_new = adt7410_i2c_probe, - .remove = adt7410_i2c_remove, .id_table = adt7410_ids, .address_list = I2C_ADDRS(0x48, 0x49, 0x4a, 0x4b), }; diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c index 147c28b241675..ea8cd918bc226 100644 --- a/drivers/hwmon/adt7x10.c +++ b/drivers/hwmon/adt7x10.c @@ -397,11 +397,6 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, } EXPORT_SYMBOL_GPL(adt7x10_probe); -void adt7x10_remove(struct device *dev, int irq) -{ -} -EXPORT_SYMBOL_GPL(adt7x10_remove); - #ifdef CONFIG_PM_SLEEP static int adt7x10_suspend(struct device *dev) diff --git a/drivers/hwmon/adt7x10.h b/drivers/hwmon/adt7x10.h index 55ff08bfe9462..ba22c32c83552 100644 --- a/drivers/hwmon/adt7x10.h +++ b/drivers/hwmon/adt7x10.h @@ -20,8 +20,6 @@ struct device; int adt7x10_probe(struct device *dev, const char *name, int irq, struct regmap *regmap); -void adt7x10_remove(struct device *dev, int irq); - #ifdef CONFIG_PM_SLEEP extern const struct dev_pm_ops adt7x10_dev_pm_ops; #define ADT7X10_DEV_PM_OPS (&adt7x10_dev_pm_ops) -- GitLab From a7a5731a09521a1053bb409647bf3e52cdf350de Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 21 Dec 2021 23:58:41 +0200 Subject: [PATCH 0754/1586] hwmon: (adt7x10) Use hwmon_notify_event The hwmon subsystem provides means of notifying userspace about events. Use it. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20211221215841.2641417-8-demonsingur@gmail.com [groeck: Pass hwmon device to interrupt handler] Tested-by: Cosmin Tanislav Reviewed-by: Cosmin Tanislav Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7x10.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c index ea8cd918bc226..ce54bffab2ec2 100644 --- a/drivers/hwmon/adt7x10.c +++ b/drivers/hwmon/adt7x10.c @@ -87,11 +87,11 @@ static irqreturn_t adt7x10_irq_handler(int irq, void *private) return IRQ_HANDLED; if (status & ADT7X10_STAT_T_HIGH) - sysfs_notify(&dev->kobj, NULL, "temp1_max_alarm"); + hwmon_notify_event(dev, hwmon_temp, hwmon_temp_max_alarm, 0); if (status & ADT7X10_STAT_T_LOW) - sysfs_notify(&dev->kobj, NULL, "temp1_min_alarm"); + hwmon_notify_event(dev, hwmon_temp, hwmon_temp_min_alarm, 0); if (status & ADT7X10_STAT_T_CRIT) - sysfs_notify(&dev->kobj, NULL, "temp1_crit_alarm"); + hwmon_notify_event(dev, hwmon_temp, hwmon_temp_crit_alarm, 0); return IRQ_HANDLED; } @@ -388,7 +388,7 @@ int adt7x10_probe(struct device *dev, const char *name, int irq, adt7x10_irq_handler, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - dev_name(dev), dev); + dev_name(dev), hdev); if (ret) return ret; } -- GitLab From f545a2fd473606809cb4a5045919a10194d86d36 Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Tue, 8 Feb 2022 10:42:43 +0100 Subject: [PATCH 0755/1586] hwmon: (asus-ec-sensors) add CPU core voltage A user discovered [1] the CPU Core voltage sensor, which spans 2 registers and provides output in mV. Althroug the discovery was made with a X470 chipset, the sensor is present in X570 (tested with C8H). For now simply add it to each board with the CPU current sensor present. [1] https://github.com/zeule/asus-ec-sensors/issues/12 Signed-off-by: Eugene Shalygin Tested-by: Oleksandr Natalenko Tested-by: Denis Pauk Link: https://lore.kernel.org/r/20220208094244.1106312-1-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/asus_ec_sensors.rst | 1 + drivers/hwmon/asus-ec-sensors.c | 29 ++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst index 22de1b037cfba..e7e8f1640f457 100644 --- a/Documentation/hwmon/asus_ec_sensors.rst +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -39,6 +39,7 @@ The driver is aware of and reads the following sensors: 9. Readings from the "Water flow meter" header (RPM) 10. Readings from the "Water In" and "Water Out" temperature headers 11. CPU current +12. CPU core voltage Sensor values are read from EC registers, and to avoid race with the board firmware the driver acquires ACPI mutex, the one used by the WMI when its diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index dd7b207d062fd..bfac08a5dc57d 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -18,6 +18,7 @@ * - VRM Heat Sink fan RPM * - Water Flow fan RPM * - CPU current + * - CPU core voltage */ #include @@ -100,6 +101,8 @@ enum ec_sensors { ec_sensor_temp_t_sensor, /* VRM temperature [℃] */ ec_sensor_temp_vrm, + /* CPU Core voltage [mV] */ + ec_sensor_in_cpu_core, /* CPU_Opt fan [RPM] */ ec_sensor_fan_cpu_opt, /* VRM heat sink fan [RPM] */ @@ -121,6 +124,7 @@ enum ec_sensors { #define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb) #define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor) #define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm) +#define SENSOR_IN_CPU_CORE BIT(ec_sensor_in_cpu_core) #define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt) #define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs) #define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset) @@ -139,6 +143,8 @@ static const struct ec_sensor_info known_ec_sensors[] = { [ec_sensor_temp_t_sensor] = EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d), [ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e), + [ec_sensor_in_cpu_core] = + EC_SENSOR("CPU Core", hwmon_in, 2, 0x00, 0xa2), [ec_sensor_fan_cpu_opt] = EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0), [ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2), @@ -172,32 +178,34 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | - SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII DARK HERO", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | - SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), + SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII FORMULA", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | - SENSOR_CURR_CPU), + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | - SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO (WI-FI)", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | - SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU), + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII IMPACT", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | - SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | @@ -205,17 +213,19 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | - SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU), + SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | - SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING", SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING", SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS | - SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU), + SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), {} }; @@ -467,7 +477,6 @@ static long scale_sensor_value(s32 value, int data_type) switch (data_type) { case hwmon_curr: case hwmon_temp: - case hwmon_in: return value * MILLI; default: return value; -- GitLab From a7a6f65a39a75a5821eb9aad157326c30f8bbb07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Sat, 12 Feb 2022 13:56:53 +0100 Subject: [PATCH 0756/1586] x86/Kconfig: move and modify CONFIG_I8K MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Kconfig, inside the "Processor type and features" menu, there is the CONFIG_I8K option: "Dell i8k legacy laptop support". This is very confusing - enabling CONFIG_I8K is not required for the kernel to support old Dell laptops. This option is specific to the dell-smm-hwmon driver, which mostly exports some hardware monitoring information and allows the user to change fan speed. This option is misplaced, so move CONFIG_I8K to drivers/hwmon/Kconfig, where it belongs. Also, modify the dependency order - change select SENSORS_DELL_SMM to depends on SENSORS_DELL_SMM as it is just a configuration option of dell-smm-hwmon. This includes changing the option type from tristate to bool. It was tristate because it could select CONFIG_SENSORS_DELL_SMM=m . When running "make oldconfig" on configurations with CONFIG_SENSORS_DELL_SMM enabled , this change will result in an additional question (which could be printed several times during bisecting). I think that tidying up the configuration is worth it, though. Next patch tweaks the description of CONFIG_I8K. Signed-off-by: Mateusz Jończyk Cc: Pali Rohár Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Dave Hansen Cc: Jean Delvare Cc: Guenter Roeck Cc: Mark Gross Reviewed-by: Hans de Goede Reviewed-by: Randy Dunlap Acked-by: Borislav Petkov Link: https://lore.kernel.org/r/20220212125654.357408-1-mat.jonczyk@o2.pl Signed-off-by: Guenter Roeck --- arch/x86/Kconfig | 17 ----------------- drivers/hwmon/Kconfig | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660c..71d4ddd48c027 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1275,23 +1275,6 @@ config TOSHIBA Say Y if you intend to run this kernel on a Toshiba portable. Say N otherwise. -config I8K - tristate "Dell i8k legacy laptop support" - depends on HWMON - depends on PROC_FS - select SENSORS_DELL_SMM - help - This option enables legacy /proc/i8k userspace interface in hwmon - dell-smm-hwmon driver. Character file /proc/i8k reports bios version, - temperature and allows controlling fan speeds of Dell laptops via - System Management Mode. For old Dell laptops (like Dell Inspiron 8000) - it reports also power and hotkey status. For fan speed control is - needed userspace package i8kutils. - - Say Y if you intend to run this kernel on old Dell laptops or want to - use userspace package i8kutils. - Say N otherwise. - config X86_REBOOTFIXUPS bool "Enable X86 board specific fixups for reboot" depends on X86_32 diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 387253ae6c149..fceea537101de 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -506,6 +506,22 @@ config SENSORS_DELL_SMM When option I8K is also enabled this driver provides legacy /proc/i8k userspace interface for i8kutils package. +config I8K + bool "Dell i8k legacy laptop support" + depends on SENSORS_DELL_SMM + depends on PROC_FS + help + This option enables legacy /proc/i8k userspace interface in hwmon + dell-smm-hwmon driver. Character file /proc/i8k reports bios version, + temperature and allows controlling fan speeds of Dell laptops via + System Management Mode. For old Dell laptops (like Dell Inspiron 8000) + it reports also power and hotkey status. For fan speed control is + needed userspace package i8kutils. + + Say Y if you intend to run this kernel on old Dell laptops or want to + use userspace package i8kutils. + Say N otherwise. + config SENSORS_DA9052_ADC tristate "Dialog DA9052/DA9053 ADC" depends on PMIC_DA9052 -- GitLab From 5f86cce61c1d12219b2ea25b9404bf263a77dd72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Sat, 12 Feb 2022 13:56:54 +0100 Subject: [PATCH 0757/1586] hwmon: (dell-smm) rewrite CONFIG_I8K description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not the laptops, but the /proc/i8k interface that is legacy (or so I think was the intention of the help text author). The old description was confusing, fix this. The phrase "Say Y if you intend to run this kernel on old Dell laptops or want to use userspace package i8kutils." was introduced in 2015, in commit 039ae58503f3 ("hwmon: Allow to compile dell-smm-hwmon driver without /proc/i8k") I think that "old laptops" was about hotkey and Fn key support - this driver in the 2.4 kernels' era apparently had these capabilities (see: https://github.com/vitorafsr/i8kutils , description of "repeat_rate" kernel module parameter). Signed-off-by: Mateusz Jończyk Cc: Pali Rohár Cc: Jean Delvare Cc: Guenter Roeck Cc: Mark Gross Reviewed-by: Hans de Goede Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20220212125654.357408-2-mat.jonczyk@o2.pl Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index fceea537101de..ce9149e0648f9 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -507,19 +507,18 @@ config SENSORS_DELL_SMM userspace interface for i8kutils package. config I8K - bool "Dell i8k legacy laptop support" + bool "Legacy /proc/i8k interface of Dell laptop SMM BIOS hwmon driver" depends on SENSORS_DELL_SMM depends on PROC_FS help - This option enables legacy /proc/i8k userspace interface in hwmon - dell-smm-hwmon driver. Character file /proc/i8k reports bios version, - temperature and allows controlling fan speeds of Dell laptops via - System Management Mode. For old Dell laptops (like Dell Inspiron 8000) - it reports also power and hotkey status. For fan speed control is - needed userspace package i8kutils. + This option enables the legacy /proc/i8k userspace interface of the + dell-smm-hwmon driver. The character file /proc/i8k exposes the BIOS + version, temperatures and allows control of fan speeds of some Dell + laptops. Sometimes it also reports power and hotkey status. - Say Y if you intend to run this kernel on old Dell laptops or want to - use userspace package i8kutils. + This interface is required to run programs from the i8kutils package. + + Say Y if you intend to run userspace programs that use this interface. Say N otherwise. config SENSORS_DA9052_ADC -- GitLab From 99cb5e9f7a78857657220f65533dce550331d629 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 13 Feb 2022 01:47:33 +0100 Subject: [PATCH 0758/1586] hwmon: (tc654) Add thermal_cooling device support Adds thermal_cooling device support to the tc654/tc655 driver. This make it possible to integrate it into a device-tree supported thermal-zone node as a cooling device. I have been using this patch as part of the Netgear WNDR4700 Centria NAS Router support within OpenWrt since 2016. Signed-off-by: Christian Lamparter Link: https://lore.kernel.org/r/20220213004733.2421193-1-chunkeey@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/tc654.c | 104 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/drivers/hwmon/tc654.c b/drivers/hwmon/tc654.c index a52ca72af1201..54cd33d096884 100644 --- a/drivers/hwmon/tc654.c +++ b/drivers/hwmon/tc654.c @@ -15,6 +15,7 @@ #include #include #include +#include #include enum tc654_regs { @@ -379,28 +380,20 @@ static ssize_t pwm_show(struct device *dev, struct device_attribute *da, return sprintf(buf, "%d\n", pwm); } -static ssize_t pwm_store(struct device *dev, struct device_attribute *da, - const char *buf, size_t count) +static int _set_pwm(struct tc654_data *data, unsigned long val) { - struct tc654_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client; - unsigned long val; int ret; - if (kstrtoul(buf, 10, &val)) - return -EINVAL; - if (val > 255) - return -EINVAL; - mutex_lock(&data->update_lock); - if (val == 0) + if (val == 0) { data->config |= TC654_REG_CONFIG_SDM; - else + data->duty_cycle = 0; + } else { data->config &= ~TC654_REG_CONFIG_SDM; - - data->duty_cycle = find_closest(val, tc654_pwm_map, - ARRAY_SIZE(tc654_pwm_map)); + data->duty_cycle = val - 1; + } ret = i2c_smbus_write_byte_data(client, TC654_REG_CONFIG, data->config); if (ret < 0) @@ -411,6 +404,24 @@ static ssize_t pwm_store(struct device *dev, struct device_attribute *da, out: mutex_unlock(&data->update_lock); + return ret; +} + +static ssize_t pwm_store(struct device *dev, struct device_attribute *da, + const char *buf, size_t count) +{ + struct tc654_data *data = dev_get_drvdata(dev); + unsigned long val; + int ret; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + if (val > 255) + return -EINVAL; + if (val > 0) + val = find_closest(val, tc654_pwm_map, ARRAY_SIZE(tc654_pwm_map)) + 1; + + ret = _set_pwm(data, val); return ret < 0 ? ret : count; } @@ -442,6 +453,58 @@ static struct attribute *tc654_attrs[] = { ATTRIBUTE_GROUPS(tc654); +/* + * thermal cooling device functions + * + * Account for the "ShutDown Mode (SDM)" state by offsetting + * the 16 PWM duty cycle states by 1. + * + * State 0 = 0% PWM | Shutdown - Fan(s) are off + * State 1 = 30% PWM | duty_cycle = 0 + * State 2 = ~35% PWM | duty_cycle = 1 + * [...] + * State 15 = ~95% PWM | duty_cycle = 14 + * State 16 = 100% PWM | duty_cycle = 15 + */ +#define TC654_MAX_COOLING_STATE 16 + +static int tc654_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) +{ + *state = TC654_MAX_COOLING_STATE; + return 0; +} + +static int tc654_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct tc654_data *data = tc654_update_client(cdev->devdata); + + if (IS_ERR(data)) + return PTR_ERR(data); + + if (data->config & TC654_REG_CONFIG_SDM) + *state = 0; /* FAN is off */ + else + *state = data->duty_cycle + 1; /* offset PWM States by 1 */ + + return 0; +} + +static int tc654_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) +{ + struct tc654_data *data = tc654_update_client(cdev->devdata); + + if (IS_ERR(data)) + return PTR_ERR(data); + + return _set_pwm(data, clamp_val(state, 0, TC654_MAX_COOLING_STATE)); +} + +static const struct thermal_cooling_device_ops tc654_fan_cool_ops = { + .get_max_state = tc654_get_max_state, + .get_cur_state = tc654_get_cur_state, + .set_cur_state = tc654_set_cur_state, +}; + /* * device probe and removal */ @@ -472,7 +535,18 @@ static int tc654_probe(struct i2c_client *client) hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, tc654_groups); - return PTR_ERR_OR_ZERO(hwmon_dev); + if (IS_ERR(hwmon_dev)) + return PTR_ERR(hwmon_dev); + + if (IS_ENABLED(CONFIG_THERMAL)) { + struct thermal_cooling_device *cdev; + + cdev = devm_thermal_of_cooling_device_register(dev, dev->of_node, client->name, + hwmon_dev, &tc654_fan_cool_ops); + return PTR_ERR_OR_ZERO(cdev); + } + + return 0; } static const struct i2c_device_id tc654_id[] = { -- GitLab From 8aba9ca62677570abf8c6b62611adb85bf1580ca Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Fri, 11 Feb 2022 17:48:55 +0100 Subject: [PATCH 0759/1586] hwmon: (asus-ec-sensors) deduce sensor signedness from its type Reading DSDT code for ASUS X470-based boards (the ones served by the asus_wmi_Sensors driver), where ASUS put hardware monitoring functions into the WMI code, reveals that fan and current sensors data is unsigned. For the current sensor that was confirmed by a user who showed high enough current value for overflow. Thus let's assume that the signedness of the sensors is determined by its type and that only temperature ones provide signed numbers. Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220211164855.265698-1-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 40 +++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index bfac08a5dc57d..d2b84578d2af2 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -266,6 +266,15 @@ static u8 register_index(u16 reg) return reg & 0x00ff; } +static bool is_sensor_data_signed(const struct ec_sensor_info *si) +{ + /* + * guessed from WMI functions in DSDT code for boards + * of the X470 generation + */ + return si->type == hwmon_temp; +} + static const struct ec_sensor_info * get_sensor_info(const struct ec_sensors_data *state, int index) { @@ -420,15 +429,28 @@ static int asus_ec_block_read(const struct device *dev, static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data) { - switch (si->addr.components.size) { - case 1: - return (s8)*data; - case 2: - return (s16)get_unaligned_be16(data); - case 4: - return (s32)get_unaligned_be32(data); - default: - return 0; + if (is_sensor_data_signed(si)) { + switch (si->addr.components.size) { + case 1: + return (s8)*data; + case 2: + return (s16)get_unaligned_be16(data); + case 4: + return (s32)get_unaligned_be32(data); + default: + return 0; + } + } else { + switch (si->addr.components.size) { + case 1: + return *data; + case 2: + return get_unaligned_be16(data); + case 4: + return get_unaligned_be32(data); + default: + return 0; + } } } -- GitLab From 1298184b387fd71fe68ecb7dc2001cb63ffa17c5 Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Wed, 16 Feb 2022 20:19:58 +0100 Subject: [PATCH 0760/1586] hwmon: (asus-ec-sensors) merge setup functions Merge configure_sensor_setup() into probe(). Changes: - v2: add local struct device *dev = &pdev->dev; - v3: initialize dev at declaration - v4: fix checkpatch warning - v5: fix formatting - v6: code style fixes Signed-off-by: Eugene Shalygin Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 41 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index d2b84578d2af2..0701ade162270 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -611,23 +611,31 @@ get_board_sensors(const struct device *dev) return (unsigned long)dmi_entry->driver_data; } -static int __init configure_sensor_setup(struct device *dev) +static int __init asus_ec_probe(struct platform_device *pdev) { - struct ec_sensors_data *ec_data = dev_get_drvdata(dev); + const struct hwmon_channel_info **ptr_asus_ec_ci; int nr_count[hwmon_max] = { 0 }, nr_types = 0; - struct device *hwdev; struct hwmon_channel_info *asus_ec_hwmon_chan; - const struct hwmon_channel_info **ptr_asus_ec_ci; const struct hwmon_chip_info *chip_info; + struct device *dev = &pdev->dev; + struct ec_sensors_data *ec_data; const struct ec_sensor_info *si; enum hwmon_sensor_types type; + unsigned long board_sensors; + struct device *hwdev; unsigned int i; - ec_data->board_sensors = get_board_sensors(dev); - if (!ec_data->board_sensors) { + board_sensors = get_board_sensors(dev); + if (!board_sensors) return -ENODEV; - } + ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data), + GFP_KERNEL); + if (!ec_data) + return -ENOMEM; + + dev_set_drvdata(dev, ec_data); + ec_data->board_sensors = board_sensors; ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); @@ -638,9 +646,8 @@ static int __init configure_sensor_setup(struct device *dev) ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers, sizeof(u8), GFP_KERNEL); - if (!ec_data->registers || !ec_data->read_buffer) { + if (!ec_data->registers || !ec_data->read_buffer) return -ENOMEM; - } fill_ec_registers(ec_data); @@ -688,22 +695,6 @@ static int __init configure_sensor_setup(struct device *dev) return PTR_ERR_OR_ZERO(hwdev); } -static int __init asus_ec_probe(struct platform_device *pdev) -{ - struct ec_sensors_data *state; - int status = 0; - - state = devm_kzalloc(&pdev->dev, sizeof(struct ec_sensors_data), - GFP_KERNEL); - - if (!state) { - return -ENOMEM; - } - - dev_set_drvdata(&pdev->dev, state); - status = configure_sensor_setup(&pdev->dev); - return status; -} static const struct acpi_device_id acpi_ec_ids[] = { /* Embedded Controller Device */ -- GitLab From 034dadfbd329231fc771984d4897c71f73a4434d Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Thu, 17 Feb 2022 08:32:38 +0100 Subject: [PATCH 0761/1586] hwmon: (asus-ec-sensors) depend on X86 in KConfig All the supported mainboards are for the X86 platform Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220217073238.2479005-1-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index ce9149e0648f9..b3597ba66ddb9 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -2284,6 +2284,7 @@ config SENSORS_ASUS_WMI_EC config SENSORS_ASUS_EC tristate "ASUS EC Sensors" + depends on X86 help If you say yes here you get support for the ACPI embedded controller hardware monitoring interface found in ASUS motherboards. The driver -- GitLab From e0f0307ac16fe69c6f9dcc8d14b532bc608fc989 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Fri, 18 Feb 2022 16:03:59 -0800 Subject: [PATCH 0762/1586] hwmon: (pmbus) Add get_error_flags support to regulator ops The various PMBus status bits don't all map perfectly to the more limited set of REGULATOR_ERROR_* flags, but there's a reasonable number where they correspond well enough. Signed-off-by: Zev Weiss Link: https://lore.kernel.org/r/20220219000359.19985-1-zev@bewilderbeest.net [groeck: Added missing locking] Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 114 +++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index b1386a4df4cc4..8eed7968a50ef 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2432,10 +2432,124 @@ static int pmbus_regulator_disable(struct regulator_dev *rdev) return _pmbus_regulator_on_off(rdev, 0); } +/* A PMBus status flag and the corresponding REGULATOR_ERROR_* flag */ +struct pmbus_regulator_status_assoc { + int pflag, rflag; +}; + +/* PMBus->regulator bit mappings for a PMBus status register */ +struct pmbus_regulator_status_category { + int func; + int reg; + const struct pmbus_regulator_status_assoc *bits; /* zero-terminated */ +}; + +static const struct pmbus_regulator_status_category pmbus_regulator_flag_map[] = { + { + .func = PMBUS_HAVE_STATUS_VOUT, + .reg = PMBUS_STATUS_VOUT, + .bits = (const struct pmbus_regulator_status_assoc[]) { + { PB_VOLTAGE_UV_WARNING, REGULATOR_ERROR_UNDER_VOLTAGE_WARN }, + { PB_VOLTAGE_UV_FAULT, REGULATOR_ERROR_UNDER_VOLTAGE }, + { PB_VOLTAGE_OV_WARNING, REGULATOR_ERROR_OVER_VOLTAGE_WARN }, + { PB_VOLTAGE_OV_FAULT, REGULATOR_ERROR_REGULATION_OUT }, + { }, + }, + }, { + .func = PMBUS_HAVE_STATUS_IOUT, + .reg = PMBUS_STATUS_IOUT, + .bits = (const struct pmbus_regulator_status_assoc[]) { + { PB_IOUT_OC_WARNING, REGULATOR_ERROR_OVER_CURRENT_WARN }, + { PB_IOUT_OC_FAULT, REGULATOR_ERROR_OVER_CURRENT }, + { PB_IOUT_OC_LV_FAULT, REGULATOR_ERROR_OVER_CURRENT }, + { }, + }, + }, { + .func = PMBUS_HAVE_STATUS_TEMP, + .reg = PMBUS_STATUS_TEMPERATURE, + .bits = (const struct pmbus_regulator_status_assoc[]) { + { PB_TEMP_OT_WARNING, REGULATOR_ERROR_OVER_TEMP_WARN }, + { PB_TEMP_OT_FAULT, REGULATOR_ERROR_OVER_TEMP }, + { }, + }, + }, +}; + +static int pmbus_regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags) +{ + int i, status; + const struct pmbus_regulator_status_category *cat; + const struct pmbus_regulator_status_assoc *bit; + struct device *dev = rdev_get_dev(rdev); + struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); + u8 page = rdev_get_id(rdev); + int func = data->info->func[page]; + + *flags = 0; + + mutex_lock(&data->update_lock); + + for (i = 0; i < ARRAY_SIZE(pmbus_regulator_flag_map); i++) { + cat = &pmbus_regulator_flag_map[i]; + if (!(func & cat->func)) + continue; + + status = pmbus_read_byte_data(client, page, cat->reg); + if (status < 0) { + mutex_unlock(&data->update_lock); + return status; + } + + for (bit = cat->bits; bit->pflag; bit++) { + if (status & bit->pflag) + *flags |= bit->rflag; + } + } + + /* + * Map what bits of STATUS_{WORD,BYTE} we can to REGULATOR_ERROR_* + * bits. Some of the other bits are tempting (especially for cases + * where we don't have the relevant PMBUS_HAVE_STATUS_* + * functionality), but there's an unfortunate ambiguity in that + * they're defined as indicating a fault *or* a warning, so we can't + * easily determine whether to report REGULATOR_ERROR_ or + * REGULATOR_ERROR__WARN. + */ + status = pmbus_get_status(client, page, PMBUS_STATUS_WORD); + mutex_unlock(&data->update_lock); + if (status < 0) + return status; + + if (pmbus_regulator_is_enabled(rdev) && (status & PB_STATUS_OFF)) + *flags |= REGULATOR_ERROR_FAIL; + + /* + * Unlike most other status bits, PB_STATUS_{IOUT_OC,VOUT_OV} are + * defined strictly as fault indicators (not warnings). + */ + if (status & PB_STATUS_IOUT_OC) + *flags |= REGULATOR_ERROR_OVER_CURRENT; + if (status & PB_STATUS_VOUT_OV) + *flags |= REGULATOR_ERROR_REGULATION_OUT; + + /* + * If we haven't discovered any thermal faults or warnings via + * PMBUS_STATUS_TEMPERATURE, map PB_STATUS_TEMPERATURE to a warning as + * a (conservative) best-effort interpretation. + */ + if (!(*flags & (REGULATOR_ERROR_OVER_TEMP | REGULATOR_ERROR_OVER_TEMP_WARN)) && + (status & PB_STATUS_TEMPERATURE)) + *flags |= REGULATOR_ERROR_OVER_TEMP_WARN; + + return 0; +} + const struct regulator_ops pmbus_regulator_ops = { .enable = pmbus_regulator_enable, .disable = pmbus_regulator_disable, .is_enabled = pmbus_regulator_is_enabled, + .get_error_flags = pmbus_regulator_get_error_flags, }; EXPORT_SYMBOL_NS_GPL(pmbus_regulator_ops, PMBUS); -- GitLab From da78ad2b6b9b7308c5835428442fef8d73cea2e3 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Fri, 18 Feb 2022 16:07:42 -0800 Subject: [PATCH 0763/1586] hwmon: (pmbus/lm25066) Add regulator support While these chips aren't strictly advertised as voltage regulators per se, they (aside from the lm25056) support the PMBus OPERATION command to enable and disable their outputs and have status bits for reporting various warnings and faults, and can hence usefully support all the pmbus_regulator_ops operations. Signed-off-by: Zev Weiss Link: https://lore.kernel.org/r/20220219000742.20126-1-zev@bewilderbeest.net Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/Kconfig | 7 +++++++ drivers/hwmon/pmbus/lm25066.c | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig index c96f7b7338bd7..c73aa50c76157 100644 --- a/drivers/hwmon/pmbus/Kconfig +++ b/drivers/hwmon/pmbus/Kconfig @@ -174,6 +174,13 @@ config SENSORS_LM25066 This driver can also be built as a module. If so, the module will be called lm25066. +config SENSORS_LM25066_REGULATOR + bool "Regulator support for LM25066 and compatibles" + depends on SENSORS_LM25066 && REGULATOR + help + If you say yes here you get regulator support for National + Semiconductor LM25066, LM5064, and LM5066. + config SENSORS_LTC2978 tristate "Linear Technologies LTC2978 and compatibles" help diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c index 8402b41520eb3..09792cd03d9fd 100644 --- a/drivers/hwmon/pmbus/lm25066.c +++ b/drivers/hwmon/pmbus/lm25066.c @@ -435,6 +435,12 @@ static int lm25066_write_word_data(struct i2c_client *client, int page, int reg, return ret; } +#if IS_ENABLED(CONFIG_SENSORS_LM25066_REGULATOR) +static const struct regulator_desc lm25066_reg_desc[] = { + PMBUS_REGULATOR("vout", 0), +}; +#endif + static const struct i2c_device_id lm25066_id[] = { {"lm25056", lm25056}, {"lm25066", lm25066}, @@ -545,6 +551,14 @@ static int lm25066_probe(struct i2c_client *client) info->m[PSC_CURRENT_IN] = info->m[PSC_CURRENT_IN] * shunt / 1000; info->m[PSC_POWER] = info->m[PSC_POWER] * shunt / 1000; +#if IS_ENABLED(CONFIG_SENSORS_LM25066_REGULATOR) + /* LM25056 doesn't support OPERATION */ + if (data->id != lm25056) { + info->num_regulators = ARRAY_SIZE(lm25066_reg_desc); + info->reg_desc = lm25066_reg_desc; + } +#endif + return pmbus_do_probe(client, info); } -- GitLab From 567e77a2c213f0103912378891f25442b665cbfb Mon Sep 17 00:00:00 2001 From: Eugene Shalygin Date: Thu, 17 Feb 2022 20:43:18 +0100 Subject: [PATCH 0764/1586] hwmon: (asus-ec-sensors) do not print from .probe() Remove the call to dev_info() from the board detection function, which is called from probe(), not only to be in line with hwmon driver rules, but also because the message duplicates the error code returned from probe() for that case (ENODEV). Changes in: - v2: add missing newline (style). Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20220217194318.2960472-1-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 0701ade162270..b5cf0136360cd 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -597,18 +597,12 @@ static struct hwmon_chip_info asus_ec_chip_info = { .ops = &asus_ec_hwmon_ops, }; -static unsigned long __init -get_board_sensors(const struct device *dev) +static unsigned long __init get_board_sensors(void) { - const struct dmi_system_id *dmi_entry; + const struct dmi_system_id *dmi_entry = + dmi_first_match(asus_ec_dmi_table); - dmi_entry = dmi_first_match(asus_ec_dmi_table); - if (!dmi_entry) { - dev_info(dev, "Unsupported board"); - return 0; - } - - return (unsigned long)dmi_entry->driver_data; + return dmi_entry ? (unsigned long)dmi_entry->driver_data : 0; } static int __init asus_ec_probe(struct platform_device *pdev) @@ -625,7 +619,7 @@ static int __init asus_ec_probe(struct platform_device *pdev) struct device *hwdev; unsigned int i; - board_sensors = get_board_sensors(dev); + board_sensors = get_board_sensors(); if (!board_sensors) return -ENODEV; -- GitLab From 6109c3e1905c3f8d0a3909c5f6a9ad5186822b2b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 15 Feb 2022 09:10:19 -0600 Subject: [PATCH 0765/1586] hwmon: (occ) Add sysfs entry for IPS (Idle Power Saver) status BMC control applications need to check the Idle Power Saver status byte returned by the OCC poll response, so export it in sysfs with the other OCC-specific data. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20220215151022.7498-2-eajames@linux.ibm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.h | 1 + drivers/hwmon/occ/sysfs.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index 5020117be740f..a88c66d36e38a 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -119,6 +119,7 @@ struct occ { u8 prev_stat; u8 prev_ext_stat; u8 prev_occs_present; + u8 prev_ips_status; }; int occ_setup(struct occ *occ, const char *name); diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c index 03b16abef67fd..6dc69c9aa4c29 100644 --- a/drivers/hwmon/occ/sysfs.c +++ b/drivers/hwmon/occ/sysfs.c @@ -63,6 +63,9 @@ static ssize_t occ_sysfs_show(struct device *dev, else val = 1; break; + case 8: + val = header->ips_status; + break; default: return -EINVAL; } @@ -88,6 +91,7 @@ static SENSOR_DEVICE_ATTR(occ_mem_throttle, 0444, occ_sysfs_show, NULL, 4); static SENSOR_DEVICE_ATTR(occ_quick_pwr_drop, 0444, occ_sysfs_show, NULL, 5); static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6); static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7); +static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8); static DEVICE_ATTR_RO(occ_error); static struct attribute *occ_attributes[] = { @@ -99,6 +103,7 @@ static struct attribute *occ_attributes[] = { &sensor_dev_attr_occ_quick_pwr_drop.dev_attr.attr, &sensor_dev_attr_occ_state.dev_attr.attr, &sensor_dev_attr_occs_present.dev_attr.attr, + &sensor_dev_attr_occ_ips_status.dev_attr.attr, &dev_attr_occ_error.attr, NULL }; @@ -162,6 +167,11 @@ void occ_sysfs_poll_done(struct occ *occ) sysfs_notify(&occ->bus_dev->kobj, NULL, name); } + if (header->ips_status != occ->prev_ips_status) { + name = sensor_dev_attr_occ_ips_status.dev_attr.attr.name; + sysfs_notify(&occ->bus_dev->kobj, NULL, name); + } + if (occ->error && occ->error != occ->prev_error) { name = dev_attr_occ_error.attr.name; sysfs_notify(&occ->bus_dev->kobj, NULL, name); @@ -174,6 +184,7 @@ done: occ->prev_stat = header->status; occ->prev_ext_stat = header->ext_status; occ->prev_occs_present = header->occs_present; + occ->prev_ips_status = header->ips_status; } int occ_setup_sysfs(struct occ *occ) -- GitLab From a25126fc32890a2a03ba5ddd1189aa8a4bcf4591 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 15 Feb 2022 09:10:20 -0600 Subject: [PATCH 0766/1586] hwmon: (occ) Add sysfs entry for OCC mode BMC control applications need to check the OCC mode returned by the OCC poll response, so export it in sysfs with the other OCC-specific data. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20220215151022.7498-3-eajames@linux.ibm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.h | 1 + drivers/hwmon/occ/sysfs.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index a88c66d36e38a..2dd4a4d240c0f 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -120,6 +120,7 @@ struct occ { u8 prev_ext_stat; u8 prev_occs_present; u8 prev_ips_status; + u8 prev_mode; }; int occ_setup(struct occ *occ, const char *name); diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c index 6dc69c9aa4c29..88f655887c958 100644 --- a/drivers/hwmon/occ/sysfs.c +++ b/drivers/hwmon/occ/sysfs.c @@ -66,6 +66,9 @@ static ssize_t occ_sysfs_show(struct device *dev, case 8: val = header->ips_status; break; + case 9: + val = header->mode; + break; default: return -EINVAL; } @@ -92,6 +95,7 @@ static SENSOR_DEVICE_ATTR(occ_quick_pwr_drop, 0444, occ_sysfs_show, NULL, 5); static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6); static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7); static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8); +static SENSOR_DEVICE_ATTR(occ_mode, 0444, occ_sysfs_show, NULL, 9); static DEVICE_ATTR_RO(occ_error); static struct attribute *occ_attributes[] = { @@ -104,6 +108,7 @@ static struct attribute *occ_attributes[] = { &sensor_dev_attr_occ_state.dev_attr.attr, &sensor_dev_attr_occs_present.dev_attr.attr, &sensor_dev_attr_occ_ips_status.dev_attr.attr, + &sensor_dev_attr_occ_mode.dev_attr.attr, &dev_attr_occ_error.attr, NULL }; @@ -172,6 +177,11 @@ void occ_sysfs_poll_done(struct occ *occ) sysfs_notify(&occ->bus_dev->kobj, NULL, name); } + if (header->mode != occ->prev_mode) { + name = sensor_dev_attr_occ_mode.dev_attr.attr.name; + sysfs_notify(&occ->bus_dev->kobj, NULL, name); + } + if (occ->error && occ->error != occ->prev_error) { name = dev_attr_occ_error.attr.name; sysfs_notify(&occ->bus_dev->kobj, NULL, name); @@ -185,6 +195,7 @@ done: occ->prev_ext_stat = header->ext_status; occ->prev_occs_present = header->occs_present; occ->prev_ips_status = header->ips_status; + occ->prev_mode = header->mode; } int occ_setup_sysfs(struct occ *occ) -- GitLab From a03d8969887325167d5fe47ead99d280bbbc6b06 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 15 Feb 2022 09:10:21 -0600 Subject: [PATCH 0767/1586] hwmon: (occ) Add sysfs entries for additional extended status bits Add sysfs entries for DVFS due to a VRM Vdd over-temperature condition, and add the GPU throttling condition bits (such that if bit 1 is set, GPU1 is throttling). Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20220215151022.7498-4-eajames@linux.ibm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/sysfs.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c index 88f655887c958..b2f788a777469 100644 --- a/drivers/hwmon/occ/sysfs.c +++ b/drivers/hwmon/occ/sysfs.c @@ -19,6 +19,8 @@ #define OCC_EXT_STAT_DVFS_POWER BIT(6) #define OCC_EXT_STAT_MEM_THROTTLE BIT(5) #define OCC_EXT_STAT_QUICK_DROP BIT(4) +#define OCC_EXT_STAT_DVFS_VDD BIT(3) +#define OCC_EXT_STAT_GPU_THROTTLE GENMASK(2, 0) static ssize_t occ_sysfs_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -69,6 +71,12 @@ static ssize_t occ_sysfs_show(struct device *dev, case 9: val = header->mode; break; + case 10: + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD); + break; + case 11: + val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE; + break; default: return -EINVAL; } @@ -96,6 +104,8 @@ static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6); static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7); static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8); static SENSOR_DEVICE_ATTR(occ_mode, 0444, occ_sysfs_show, NULL, 9); +static SENSOR_DEVICE_ATTR(occ_dvfs_vdd, 0444, occ_sysfs_show, NULL, 10); +static SENSOR_DEVICE_ATTR(occ_gpu_throttle, 0444, occ_sysfs_show, NULL, 11); static DEVICE_ATTR_RO(occ_error); static struct attribute *occ_attributes[] = { @@ -109,6 +119,8 @@ static struct attribute *occ_attributes[] = { &sensor_dev_attr_occs_present.dev_attr.attr, &sensor_dev_attr_occ_ips_status.dev_attr.attr, &sensor_dev_attr_occ_mode.dev_attr.attr, + &sensor_dev_attr_occ_dvfs_vdd.dev_attr.attr, + &sensor_dev_attr_occ_gpu_throttle.dev_attr.attr, &dev_attr_occ_error.attr, NULL }; @@ -166,6 +178,18 @@ void occ_sysfs_poll_done(struct occ *occ) sysfs_notify(&occ->bus_dev->kobj, NULL, name); } + if ((header->ext_status & OCC_EXT_STAT_DVFS_VDD) != + (occ->prev_ext_stat & OCC_EXT_STAT_DVFS_VDD)) { + name = sensor_dev_attr_occ_dvfs_vdd.dev_attr.attr.name; + sysfs_notify(&occ->bus_dev->kobj, NULL, name); + } + + if ((header->ext_status & OCC_EXT_STAT_GPU_THROTTLE) != + (occ->prev_ext_stat & OCC_EXT_STAT_GPU_THROTTLE)) { + name = sensor_dev_attr_occ_gpu_throttle.dev_attr.attr.name; + sysfs_notify(&occ->bus_dev->kobj, NULL, name); + } + if ((header->status & OCC_STAT_MASTER) && header->occs_present != occ->prev_occs_present) { name = sensor_dev_attr_occs_present.dev_attr.attr.name; -- GitLab From 7cd682b027ee1ed8bcf1e79e208c2b6b1a4d31b7 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 15 Feb 2022 20:11:13 +0100 Subject: [PATCH 0768/1586] hwmon: (dell-smm) Reword and mark parameter "force" as unsafe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When enabling said module parameter, the driver ignores all feature blacklists on relevant models, which has the potential for strange side effects. Also there seems to be a slight chance for unsupported devices to behave badly when probed for features. In such cases, the kernel should be tainted to inform people that these issues might have been caused by the dell_smm_hwmon driver with "force" enabled. Also reword the parameter description to remind users that enabling "force" also enables blacklisted features. Tested on a Dell Inspiron 3505. Signed-off-by: Armin Wolf Reviewed-by: Pali Rohár Link: https://lore.kernel.org/r/20220215191113.16640-8-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 9949eeb79378e..1fd672610aeba 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -86,8 +86,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("i8k"); static bool force; -module_param(force, bool, 0); -MODULE_PARM_DESC(force, "Force loading without checking for supported models"); +module_param_unsafe(force, bool, 0); +MODULE_PARM_DESC(force, "Force loading without checking for supported models and features"); static bool ignore_dmi; module_param(ignore_dmi, bool, 0); -- GitLab From ec3db1ecf63e02265b4e93ddb1ba5cc6858c3972 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 15 Feb 2022 20:11:12 +0100 Subject: [PATCH 0769/1586] hwmon: (dell-smm) Add SMM interface documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the SMM interface as requested by Pali Rohar. Since Dell does not offer any offical documentation regarding the SMM interface, the necessary information was extracted from the dell_smm_hwmon driver and other sources. Suggested-by: Pali Rohár Signed-off-by: Armin Wolf Reviewed-by: Pali Rohár Link: https://lore.kernel.org/r/20220215191113.16640-7-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- Documentation/hwmon/dell-smm-hwmon.rst | 180 +++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/Documentation/hwmon/dell-smm-hwmon.rst b/Documentation/hwmon/dell-smm-hwmon.rst index beec884911715..d3323a96665d6 100644 --- a/Documentation/hwmon/dell-smm-hwmon.rst +++ b/Documentation/hwmon/dell-smm-hwmon.rst @@ -165,3 +165,183 @@ obtain the same information and to control the fan status. The ioctl interface can be accessed from C programs or from shell using the i8kctl utility. See the source file of ``i8kutils`` for more information on how to use the ioctl interface. + +SMM Interface +------------- + +.. warning:: The SMM interface was reverse-engineered by trial-and-error + since Dell did not provide any Documentation, + please keep that in mind. + +The driver uses the SMM interface to send commands to the system BIOS. +This interface is normally used by Dell's 32-bit diagnostic program or +on newer notebook models by the buildin BIOS diagnostics. +The SMM is triggered by writing to the special ioports ``0xb2`` and ``0x84``, +and may cause short hangs when the BIOS code is taking too long to +execute. + +The SMM handler inside the system BIOS looks at the contents of the +``eax``, ``ebx``, ``ecx``, ``edx``, ``esi`` and ``edi`` registers. +Each register has a special purpose: + +=============== ================================== +Register Purpose +=============== ================================== +eax Holds the command code before SMM, + holds the first result after SMM. +ebx Holds the arguments. +ecx Unknown, set to 0. +edx Holds the second result after SMM. +esi Unknown, set to 0. +edi Unknown, set to 0. +=============== ================================== + +The SMM handler can signal a failure by either: + +- setting the lower sixteen bits of ``eax`` to ``0xffff`` +- not modifying ``eax`` at all +- setting the carry flag + +SMM command codes +----------------- + +=============== ======================= ================================================ +Command Code Command Name Description +=============== ======================= ================================================ +``0x0025`` Get Fn key status Returns the Fn key pressed after SMM: + + - 9th bit in ``eax`` indicates Volume up + - 10th bit in ``eax`` indicates Volume down + - both bits indicate Volume mute + +``0xa069`` Get power status Returns current power status after SMM: + + - 1st bit in ``eax`` indicates Battery connected + - 3th bit in ``eax`` indicates AC connected + +``0x00a3`` Get fan state Returns current fan state after SMM: + + - 1st byte in ``eax`` holds the current + fan state (0 - 2 or 3) + +``0x01a3`` Set fan state Sets the fan speed: + + - 1st byte in ``ebx`` holds the fan number + - 2nd byte in ``ebx`` holds the desired + fan state (0 - 2 or 3) + +``0x02a3`` Get fan speed Returns the current fan speed in RPM: + + - 1st byte in ``ebx`` holds the fan number + - 1st word in ``eax`` holds the current + fan speed in RPM (after SMM) + +``0x03a3`` Get fan type Returns the fan type: + + - 1st byte in ``ebx`` holds the fan number + - 1st byte in ``eax`` holds the + fan type (after SMM): + + - 5th bit indicates docking fan + - 1 indicates Processor fan + - 2 indicates Motherboard fan + - 3 indicates Video fan + - 4 indicates Power supply fan + - 5 indicates Chipset fan + - 6 indicates other fan type + +``0x04a3`` Get nominal fan speed Returns the nominal RPM in each fan state: + + - 1st byte in ``ebx`` holds the fan number + - 2nd byte in ``ebx`` holds the fan state + in question (0 - 2 or 3) + - 1st word in ``eax`` holds the nominal + fan speed in RPM (after SMM) + +``0x05a3`` Get fan speed tolerance Returns the speed tolerance for each fan state: + + - 1st byte in ``ebx`` holds the fan number + - 2nd byte in ``ebx`` holds the fan state + in question (0 - 2 or 3) + - 1st byte in ``eax`` returns the speed + tolerance + +``0x10a3`` Get sensor temperature Returns the measured temperature: + + - 1st byte in ``ebx`` holds the sensor number + - 1st byte in ``eax`` holds the measured + temperature (after SMM) + +``0x11a3`` Get sensor type Returns the sensor type: + + - 1st byte in ``ebx`` holds the sensor number + - 1st byte in ``eax`` holds the + temperature type (after SMM): + + - 1 indicates CPU sensor + - 2 indicates GPU sensor + - 3 indicates SODIMM sensor + - 4 indicates other sensor type + - 5 indicates Ambient sensor + - 6 indicates other sensor type + +``0xfea3`` Get SMM signature Returns Dell signature if interface + is supported (after SMM): + + - ``eax`` holds 1145651527 + (0x44494147 or "DIAG") + - ``edx`` holds 1145392204 + (0x44454c4c or "DELL") + +``0xffa3`` Get SMM signature Same as ``0xfea3``, check both. +=============== ======================= ================================================ + +There are additional commands for enabling (``0x31a3`` or ``0x35a3``) and +disabling (``0x30a3`` or ``0x34a3``) automatic fan speed control. +The commands are however causing severe sideeffects on many machines, so +they are not used by default. + +On several machines (Inspiron 3505, Precision 490, Vostro 1720, ...), the +fans supports a 4th "magic" state, which signals the BIOS that automatic +fan control should be enabled for a specific fan. +However there are also some machines who do support a 4th regular fan state too, +but in case of the "magic" state, the nominal RPM reported for this state is a +placeholder value, which however is not always detectable. + +Firmware Bugs +------------- + +The SMM calls can behave erratic on some machines: + +======================================================= ================= +Firmware Bug Affected Machines +======================================================= ================= +Reading of fan states return spurious errors. Precision 490 + +Reading of fan types causes erratic fan behaviour. Studio XPS 8000 + + Studio XPS 8100 + + Inspiron 580 + +Fan-related SMM calls take too long (about 500ms). Inspiron 7720 + + Vostro 3360 + + XPS 13 9333 + + XPS 15 L502X +======================================================= ================= + +In case you experience similar issues on your Dell machine, please +submit a bugreport on bugzilla to we can apply workarounds. + +Limitations +----------- + +The SMM calls can take too long to execute on some machines, causing +short hangs and/or audio glitches. +Also the fan state needs to be restored after suspend, as well as +the automatic mode settings. +When reading a temperature sensor, values above 127 degrees indicate +a BIOS read error or a deactivated sensor. -- GitLab From 4d9983dee5d4839008f7d1a75e866bccaa5877e8 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 15 Feb 2022 20:11:09 +0100 Subject: [PATCH 0770/1586] hwmon: (dell-smm) Make fan/temp sensor number a u8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, we only use bits 0 to 7 of the fan/temp sensor number by doing number & 0xff. Passing the value as a u8 makes this step unnecessary. Also add checks to the ioctl handler since users might get confused when passing 0x00000101 does the same as passing 0x00000001. Tested on a Dell Inspiron 3505. Signed-off-by: Armin Wolf Reviewed-by: Pali Rohár Link: https://lore.kernel.org/r/20220215191113.16640-4-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 68 ++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 1fd672610aeba..1cde005ff4e46 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -250,46 +251,52 @@ static int i8k_smm(struct smm_regs *regs) /* * Read the fan status. */ -static int i8k_get_fan_status(const struct dell_smm_data *data, int fan) +static int i8k_get_fan_status(const struct dell_smm_data *data, u8 fan) { - struct smm_regs regs = { .eax = I8K_SMM_GET_FAN, }; + struct smm_regs regs = { + .eax = I8K_SMM_GET_FAN, + .ebx = fan, + }; if (data->disallow_fan_support) return -EINVAL; - regs.ebx = fan & 0xff; return i8k_smm(®s) ? : regs.eax & 0xff; } /* * Read the fan speed in RPM. */ -static int i8k_get_fan_speed(const struct dell_smm_data *data, int fan) +static int i8k_get_fan_speed(const struct dell_smm_data *data, u8 fan) { - struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, }; + struct smm_regs regs = { + .eax = I8K_SMM_GET_SPEED, + .ebx = fan, + }; if (data->disallow_fan_support) return -EINVAL; - regs.ebx = fan & 0xff; return i8k_smm(®s) ? : (regs.eax & 0xffff) * data->i8k_fan_mult; } /* * Read the fan type. */ -static int _i8k_get_fan_type(const struct dell_smm_data *data, int fan) +static int _i8k_get_fan_type(const struct dell_smm_data *data, u8 fan) { - struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, }; + struct smm_regs regs = { + .eax = I8K_SMM_GET_FAN_TYPE, + .ebx = fan, + }; if (data->disallow_fan_support || data->disallow_fan_type_call) return -EINVAL; - regs.ebx = fan & 0xff; return i8k_smm(®s) ? : regs.eax & 0xff; } -static int i8k_get_fan_type(struct dell_smm_data *data, int fan) +static int i8k_get_fan_type(struct dell_smm_data *data, u8 fan) { /* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */ if (data->fan_type[fan] == INT_MIN) @@ -301,14 +308,16 @@ static int i8k_get_fan_type(struct dell_smm_data *data, int fan) /* * Read the fan nominal rpm for specific fan speed. */ -static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, int fan, int speed) +static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, u8 fan, int speed) { - struct smm_regs regs = { .eax = I8K_SMM_GET_NOM_SPEED, }; + struct smm_regs regs = { + .eax = I8K_SMM_GET_NOM_SPEED, + .ebx = fan | (speed << 8), + }; if (data->disallow_fan_support) return -EINVAL; - regs.ebx = (fan & 0xff) | (speed << 8); return i8k_smm(®s) ? : (regs.eax & 0xffff) * data->i8k_fan_mult; } @@ -329,7 +338,7 @@ static int i8k_enable_fan_auto_mode(const struct dell_smm_data *data, bool enabl /* * Set the fan speed (off, low, high, ...). */ -static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed) +static int i8k_set_fan(const struct dell_smm_data *data, u8 fan, int speed) { struct smm_regs regs = { .eax = I8K_SMM_SET_FAN, }; @@ -337,33 +346,35 @@ static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed) return -EINVAL; speed = (speed < 0) ? 0 : ((speed > data->i8k_fan_max) ? data->i8k_fan_max : speed); - regs.ebx = (fan & 0xff) | (speed << 8); + regs.ebx = fan | (speed << 8); return i8k_smm(®s); } -static int __init i8k_get_temp_type(int sensor) +static int __init i8k_get_temp_type(u8 sensor) { - struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP_TYPE, }; + struct smm_regs regs = { + .eax = I8K_SMM_GET_TEMP_TYPE, + .ebx = sensor, + }; - regs.ebx = sensor & 0xff; return i8k_smm(®s) ? : regs.eax & 0xff; } /* * Read the cpu temperature. */ -static int _i8k_get_temp(int sensor) +static int _i8k_get_temp(u8 sensor) { struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP, - .ebx = sensor & 0xff, + .ebx = sensor, }; return i8k_smm(®s) ? : regs.eax & 0xff; } -static int i8k_get_temp(int sensor) +static int i8k_get_temp(u8 sensor) { int temp = _i8k_get_temp(sensor); @@ -496,6 +507,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) if (copy_from_user(&val, argp, sizeof(int))) return -EFAULT; + if (val > U8_MAX || val < 0) + return -EINVAL; + val = i8k_get_fan_speed(data, val); break; @@ -503,6 +517,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) if (copy_from_user(&val, argp, sizeof(int))) return -EFAULT; + if (val > U8_MAX || val < 0) + return -EINVAL; + val = i8k_get_fan_status(data, val); break; @@ -513,6 +530,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) if (copy_from_user(&val, argp, sizeof(int))) return -EFAULT; + if (val > U8_MAX || val < 0) + return -EINVAL; + if (copy_from_user(&speed, argp + 1, sizeof(int))) return -EFAULT; @@ -920,7 +940,8 @@ static int __init dell_smm_init_hwmon(struct device *dev) { struct dell_smm_data *data = dev_get_drvdata(dev); struct device *dell_smm_hwmon_dev; - int i, state, err; + int state, err; + u8 i; for (i = 0; i < DELL_SMM_NO_TEMP; i++) { data->temp_type[i] = i8k_get_temp_type(i); @@ -1236,7 +1257,8 @@ static int __init dell_smm_probe(struct platform_device *pdev) { struct dell_smm_data *data; const struct dmi_system_id *id, *fan_control; - int fan, ret; + int ret; + u8 fan; data = devm_kzalloc(&pdev->dev, sizeof(struct dell_smm_data), GFP_KERNEL); if (!data) -- GitLab From c82fdd42fb50d281f6c7268e136178f096af6c69 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 15 Feb 2022 20:11:10 +0100 Subject: [PATCH 0771/1586] hwmon: (dell-smm) Improve temperature sensors detection On the Dell Inspiron 3505, three temperature sensors are available through the SMM interface. However since they do not have an associated type, they are not detected. Probe for those sensors in case no type was detected. _i8k_get_temp() is used instead of i8k_get_temp() since it is sometimes faster and the result is easier to check (no -ENODATA) since we do not care about the actual temp value. Tested on a Dell Inspiron 3505. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220215191113.16640-5-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 1cde005ff4e46..c5939e68586d0 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -651,6 +651,11 @@ static umode_t dell_smm_is_visible(const void *drvdata, enum hwmon_sensor_types case hwmon_temp: switch (attr) { case hwmon_temp_input: + /* _i8k_get_temp() is fine since we do not care about the actual value */ + if (data->temp_type[channel] >= 0 || _i8k_get_temp(channel) >= 0) + return 0444; + + break; case hwmon_temp_label: if (data->temp_type[channel] >= 0) return 0444; -- GitLab From 54cc3dbfc10dc3db7cb1cf49aee4477a8398fbde Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Mon, 21 Feb 2022 12:09:56 +0100 Subject: [PATCH 0772/1586] hwmon: (pmbus) Add regulator supply into macro Add regulator supply into PWBUS_REGULATOR macro. This makes it optional to define a vin-supply in DT. Not defining a supply will add a dummy regulator supply instead and only cause the following debug output: ``` Looking up vin-supply property in node [...] failed ``` Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/58f2ff7b90233fad3d7ae2e9d66d5192e2c1ac01.1645437439.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index e0aa8aa46d8c4..38f049d68d329 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -464,6 +464,7 @@ extern const struct regulator_ops pmbus_regulator_ops; #define PMBUS_REGULATOR(_name, _id) \ [_id] = { \ .name = (_name # _id), \ + .supply_name = "vin", \ .id = (_id), \ .of_match = of_match_ptr(_name # _id), \ .regulators_node = of_match_ptr("regulators"), \ -- GitLab From 84dc9e8a7eec2cdff00728baedf0fb35fc7c11e8 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 15 Feb 2022 09:10:22 -0600 Subject: [PATCH 0773/1586] hwmon: (occ) Add soft minimum power cap attribute Export the power caps data for the soft minimum power cap through hwmon. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20220215151022.7498-5-eajames@linux.ibm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 0cb4a0a6cbc10..f00cd59f1d19f 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -674,6 +674,9 @@ static ssize_t occ_show_caps_3(struct device *dev, case 7: val = caps->user_source; break; + case 8: + val = get_unaligned_be16(&caps->soft_min) * 1000000ULL; + break; default: return -EINVAL; } @@ -835,12 +838,13 @@ static int occ_setup_sensor_attrs(struct occ *occ) case 1: num_attrs += (sensors->caps.num_sensors * 7); break; - case 3: - show_caps = occ_show_caps_3; - fallthrough; case 2: num_attrs += (sensors->caps.num_sensors * 8); break; + case 3: + show_caps = occ_show_caps_3; + num_attrs += (sensors->caps.num_sensors * 9); + break; default: sensors->caps.num_sensors = 0; } @@ -1047,6 +1051,15 @@ static int occ_setup_sensor_attrs(struct occ *occ) attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, 7, 0); attr++; + + if (sensors->caps.version > 2) { + snprintf(attr->name, sizeof(attr->name), + "power%d_cap_min_soft", s); + attr->sensor = OCC_INIT_ATTR(attr->name, 0444, + show_caps, NULL, + 8, 0); + attr++; + } } } -- GitLab From 4b1dd41cf211c2c1b93cab77aebbb0a01157d3ff Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Mon, 21 Feb 2022 10:42:04 +0100 Subject: [PATCH 0774/1586] dt-bindings: vendor-prefixes: add Vicor Corporation Add vendor prefix for Vicor Corporation. Signed-off-by: Marcello Sylvester Bauer Acked-by: Rob Herring Link: https://lore.kernel.org/r/5b487a0b68d58b64022662f1e527ac80a6ad81ef.1645435888.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 294093d45a230..047a83a089cee 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -1298,6 +1298,8 @@ patternProperties: description: Vertexcom Technologies, Inc. "^via,.*": description: VIA Technologies, Inc. + "^vicor,.*": + description: Vicor Corporation "^videostrong,.*": description: Videostrong Technology Co., Ltd. "^virtio,.*": -- GitLab From b7b94f15e55223e75b588863075a10ae79445db4 Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Mon, 21 Feb 2022 10:42:05 +0100 Subject: [PATCH 0775/1586] dt-bindings:trivial-devices: Add pli1209bc Add trivial device entry for PLI1209BC Digital Supervisor from Vicor Corporation. Signed-off-by: Marcello Sylvester Bauer Acked-by: Rob Herring Link: https://lore.kernel.org/r/15a9fcfb5b9592c6d87f12c2a4c77fd069f5cfff.1645435888.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 091792ba993e8..d03d90360aa09 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -354,6 +354,8 @@ properties: - ti,tps544c25 # Winbond/Nuvoton H/W Monitor - winbond,w83793 + # Vicor Corporation Digital Supervisor + - vicor,pli1209bc # i2c trusted platform module (TPM) - winbond,wpct301 -- GitLab From d0cd978513f2e37c353ec2dcfbd863f97edb1dcd Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Mon, 21 Feb 2022 10:42:06 +0100 Subject: [PATCH 0776/1586] hwmon: (pmbus) Add support for pli1209bc PLI1209BC is a Digital Supervisor from Vicor Corporation. Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/4e016e66275bc46c90974aec18b150c874e64787.1645435888.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- Documentation/hwmon/index.rst | 1 + Documentation/hwmon/pli1209bc.rst | 75 +++++++++++++++++++ drivers/hwmon/pmbus/Kconfig | 9 +++ drivers/hwmon/pmbus/Makefile | 1 + drivers/hwmon/pmbus/pli1209bc.c | 115 ++++++++++++++++++++++++++++++ 5 files changed, 201 insertions(+) create mode 100644 Documentation/hwmon/pli1209bc.rst create mode 100644 drivers/hwmon/pmbus/pli1209bc.c diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index b69fdaf1af82e..075d1defcd204 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst @@ -161,6 +161,7 @@ Hardware Monitoring Kernel Drivers pc87427 pcf8591 pim4328 + pli1209bc pm6764tr pmbus powr1220 diff --git a/Documentation/hwmon/pli1209bc.rst b/Documentation/hwmon/pli1209bc.rst new file mode 100644 index 0000000000000..ea5b3f68a5154 --- /dev/null +++ b/Documentation/hwmon/pli1209bc.rst @@ -0,0 +1,75 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Kernel driver pli1209bc +======================= + +Supported chips: + + * Digital Supervisor PLI1209BC + + Prefix: 'pli1209bc' + + Addresses scanned: 0x50 - 0x5F + + Datasheet: https://www.vicorpower.com/documents/datasheets/ds-PLI1209BCxyzz-VICOR.pdf + +Authors: + - Marcello Sylvester Bauer + +Description +----------- + +The Vicor PLI1209BC is an isolated digital power system supervisor that provides +a communication interface between a host processor and one Bus Converter Module +(BCM). The PLI communicates with a system controller via a PMBus compatible +interface over an isolated UART interface. Through the PLI, the host processor +can configure, set protection limits, and monitor the BCM. + +Sysfs entries +------------- + +======================= ======================================================== +in1_label "vin2" +in1_input Input voltage. +in1_rated_min Minimum rated input voltage. +in1_rated_max Maximum rated input voltage. +in1_max Maximum input voltage. +in1_max_alarm Input voltage high alarm. +in1_crit Critical input voltage. +in1_crit_alarm Input voltage critical alarm. + +in2_label "vout2" +in2_input Output voltage. +in2_rated_min Minimum rated output voltage. +in2_rated_max Maximum rated output voltage. +in2_alarm Output voltage alarm + +curr1_label "iin2" +curr1_input Input current. +curr1_max Maximum input current. +curr1_max_alarm Maximum input current high alarm. +curr1_crit Critical input current. +curr1_crit_alarm Input current critical alarm. + +curr2_label "iout2" +curr2_input Output current. +curr2_crit Critical output current. +curr2_crit_alarm Output current critical alarm. +curr2_max Maximum output current. +curr2_max_alarm Output current high alarm. + +power1_label "pin2" +power1_input Input power. +power1_alarm Input power alarm. + +power2_label "pout2" +power2_input Output power. +power2_rated_max Maximum rated output power. + +temp1_input Die temperature. +temp1_alarm Die temperature alarm. +temp1_max Maximum die temperature. +temp1_max_alarm Die temperature high alarm. +temp1_crit Critical die temperature. +temp1_crit_alarm Die temperature critical alarm. +======================= ======================================================== diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig index c73aa50c76157..f18f67a946974 100644 --- a/drivers/hwmon/pmbus/Kconfig +++ b/drivers/hwmon/pmbus/Kconfig @@ -317,6 +317,15 @@ config SENSORS_PIM4328 This driver can also be built as a module. If so, the module will be called pim4328. +config SENSORS_PLI1209BC + tristate "Vicor PLI1209BC" + help + If you say yes here you get hardware monitoring support for Vicor + PLI1209BC Digital Supervisor. + + This driver can also be built as a module. If so, the module will + be called pli1209bc. + config SENSORS_PM6764TR tristate "ST PM6764TR" help diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile index e5935f70c9e01..a4a96ac71de79 100644 --- a/drivers/hwmon/pmbus/Makefile +++ b/drivers/hwmon/pmbus/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_SENSORS_MAX8688) += max8688.o obj-$(CONFIG_SENSORS_MP2888) += mp2888.o obj-$(CONFIG_SENSORS_MP2975) += mp2975.o obj-$(CONFIG_SENSORS_MP5023) += mp5023.o +obj-$(CONFIG_SENSORS_PLI1209BC) += pli1209bc.o obj-$(CONFIG_SENSORS_PM6764TR) += pm6764tr.o obj-$(CONFIG_SENSORS_PXE1610) += pxe1610.o obj-$(CONFIG_SENSORS_Q54SJ108A2) += q54sj108a2.o diff --git a/drivers/hwmon/pmbus/pli1209bc.c b/drivers/hwmon/pmbus/pli1209bc.c new file mode 100644 index 0000000000000..5f8847307e559 --- /dev/null +++ b/drivers/hwmon/pmbus/pli1209bc.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Hardware monitoring driver for Vicor PLI1209BC Digital Supervisor + * + * Copyright (c) 2022 9elements GmbH + */ + +#include +#include +#include +#include "pmbus.h" + +/* + * The capability command is only supported at page 0. Probing the device while + * the page register is set to 1 will falsely enable PEC support. Disable + * capability probing accordingly, since the PLI1209BC does not have any + * additional capabilities. + */ +static struct pmbus_platform_data pli1209bc_plat_data = { + .flags = PMBUS_NO_CAPABILITY, +}; + +static int pli1209bc_read_word_data(struct i2c_client *client, int page, + int phase, int reg) +{ + int data; + + switch (reg) { + /* PMBUS_READ_POUT uses a direct format with R=0 */ + case PMBUS_READ_POUT: + data = pmbus_read_word_data(client, page, phase, reg); + if (data < 0) + return data; + data = sign_extend32(data, 15) * 10; + return clamp_val(data, -32768, 32767) & 0xffff; + default: + return -ENODATA; + } +} + +static struct pmbus_driver_info pli1209bc_info = { + .pages = 2, + .format[PSC_VOLTAGE_IN] = direct, + .format[PSC_VOLTAGE_OUT] = direct, + .format[PSC_CURRENT_IN] = direct, + .format[PSC_CURRENT_OUT] = direct, + .format[PSC_POWER] = direct, + .format[PSC_TEMPERATURE] = direct, + .m[PSC_VOLTAGE_IN] = 1, + .b[PSC_VOLTAGE_IN] = 0, + .R[PSC_VOLTAGE_IN] = 1, + .m[PSC_VOLTAGE_OUT] = 1, + .b[PSC_VOLTAGE_OUT] = 0, + .R[PSC_VOLTAGE_OUT] = 1, + .m[PSC_CURRENT_IN] = 1, + .b[PSC_CURRENT_IN] = 0, + .R[PSC_CURRENT_IN] = 3, + .m[PSC_CURRENT_OUT] = 1, + .b[PSC_CURRENT_OUT] = 0, + .R[PSC_CURRENT_OUT] = 2, + .m[PSC_POWER] = 1, + .b[PSC_POWER] = 0, + .R[PSC_POWER] = 1, + .m[PSC_TEMPERATURE] = 1, + .b[PSC_TEMPERATURE] = 0, + .R[PSC_TEMPERATURE] = 0, + /* + * Page 0 sums up all attributes except voltage readings. + * The pli1209 digital supervisor only contains a single BCM, making + * page 0 redundant. + */ + .func[1] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT + | PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT + | PMBUS_HAVE_PIN | PMBUS_HAVE_POUT + | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP + | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT, + .read_word_data = pli1209bc_read_word_data, +}; + +static int pli1209bc_probe(struct i2c_client *client) +{ + client->dev.platform_data = &pli1209bc_plat_data; + return pmbus_do_probe(client, &pli1209bc_info); +} + +static const struct i2c_device_id pli1209bc_id[] = { + {"pli1209bc", 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, pli1209bc_id); + +#ifdef CONFIG_OF +static const struct of_device_id pli1209bc_of_match[] = { + { .compatible = "vicor,pli1209bc" }, + { }, +}; +MODULE_DEVICE_TABLE(of, pli1209bc_of_match); +#endif + +static struct i2c_driver pli1209bc_driver = { + .driver = { + .name = "pli1209bc", + .of_match_table = of_match_ptr(pli1209bc_of_match), + }, + .probe_new = pli1209bc_probe, + .id_table = pli1209bc_id, +}; + +module_i2c_driver(pli1209bc_driver); + +MODULE_AUTHOR("Marcello Sylvester Bauer "); +MODULE_DESCRIPTION("PMBus driver for Vicor PLI1209BC"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(PMBUS); -- GitLab From ba1d263af1c034baf479bca14d6f715b713214b1 Mon Sep 17 00:00:00 2001 From: Marcello Sylvester Bauer Date: Mon, 21 Feb 2022 10:42:07 +0100 Subject: [PATCH 0777/1586] hwmon: (pmbus/pli1209bc) Add regulator support Add regulator support for PLI1209BC Digital Supervisor. Signed-off-by: Marcello Sylvester Bauer Link: https://lore.kernel.org/r/21b0cdb6dd72654effa451d3b1636ecd07b160e9.1645435888.git.sylv@sylv.io Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/Kconfig | 7 +++++++ drivers/hwmon/pmbus/pli1209bc.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig index f18f67a946974..6552467c588d1 100644 --- a/drivers/hwmon/pmbus/Kconfig +++ b/drivers/hwmon/pmbus/Kconfig @@ -326,6 +326,13 @@ config SENSORS_PLI1209BC This driver can also be built as a module. If so, the module will be called pli1209bc. +config SENSORS_PLI1209BC_REGULATOR + bool "Regulator support for PLI1209BC" + depends on SENSORS_PLI1209BC && REGULATOR + help + If you say yes here you get regulator support for Vicor PLI1209BC + Digital Supervisor. + config SENSORS_PM6764TR tristate "ST PM6764TR" help diff --git a/drivers/hwmon/pmbus/pli1209bc.c b/drivers/hwmon/pmbus/pli1209bc.c index 5f8847307e559..05b4ee35ba270 100644 --- a/drivers/hwmon/pmbus/pli1209bc.c +++ b/drivers/hwmon/pmbus/pli1209bc.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "pmbus.h" /* @@ -33,11 +34,37 @@ static int pli1209bc_read_word_data(struct i2c_client *client, int page, return data; data = sign_extend32(data, 15) * 10; return clamp_val(data, -32768, 32767) & 0xffff; + /* + * PMBUS_READ_VOUT and PMBUS_READ_TEMPERATURE_1 return invalid data + * when the BCM is turned off. Since it is not possible to return + * ENODATA error, return zero instead. + */ + case PMBUS_READ_VOUT: + case PMBUS_READ_TEMPERATURE_1: + data = pmbus_read_word_data(client, page, phase, + PMBUS_STATUS_WORD); + if (data < 0) + return data; + if (data & PB_STATUS_POWER_GOOD_N) + return 0; + return pmbus_read_word_data(client, page, phase, reg); default: return -ENODATA; } } +#if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR) +static const struct regulator_desc pli1209bc_reg_desc = { + .name = "vout2", + .id = 1, + .of_match = of_match_ptr("vout2"), + .regulators_node = of_match_ptr("regulators"), + .ops = &pmbus_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, +}; +#endif + static struct pmbus_driver_info pli1209bc_info = { .pages = 2, .format[PSC_VOLTAGE_IN] = direct, @@ -75,6 +102,10 @@ static struct pmbus_driver_info pli1209bc_info = { | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT, .read_word_data = pli1209bc_read_word_data, +#if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR) + .num_regulators = 1, + .reg_desc = &pli1209bc_reg_desc, +#endif }; static int pli1209bc_probe(struct i2c_client *client) -- GitLab From 64b631fb0c6f7e5fbbe0d641556e07e7a8a272ef Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 18 Feb 2022 10:06:42 +0100 Subject: [PATCH 0778/1586] dt-bindings: Add ti,tmp125 temperature sensor binding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the freely available Texas Instruments' TMP125 datasheet: "The TMP125 is an SPI-compatible temperature sensor available in the tiny SOT23-6 package. Requiring no external components, the TMP125 is capable of measuring temperatures within 2 degree C of accuracy over a temperature range of −25 degree C to +85 degree C and 2.5 degree C of accuracy over −40 degree C to +125 degree C." The TMP125 is very similar to the TMP121/TMP122 series of familiar chips. Signed-off-by: Christian Lamparter Acked-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/d3538ba9beededfe3a9ad5dab4903a6a01834822.1645175187.git.chunkeey@gmail.com Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/trivial-devices.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index d03d90360aa09..1c49bbc654eed 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -337,6 +337,7 @@ properties: # Thermometer with SPI interface - ti,tmp121 - ti,tmp122 + - ti,tmp125 # Digital Temperature Sensor - ti,tmp275 # TI DC-DC converter on PMBus -- GitLab From cd929672a9ef644aca12de59a75de5f061d5983d Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 18 Feb 2022 10:06:43 +0100 Subject: [PATCH 0779/1586] hwmon: (lm70) Add ti,tmp125 support The TMP125 is a 2 degree Celsius accurate Digital Temperature Sensor with a SPI interface. The temperature register is a 16-bit, read-only register. The MSB (Bit 15) is a leading zero and never set. Bits 14 to 5 are the 1+9 temperature data bits in a two's complement format. Bits 4 to 0 are useless copies of Bit 5 value and therefore ignored. This was tested on a Aerohive HiveAP-350. Bonus: lm70 supports TMP122/TMP124 as well. I added them to the Kconfig module description. Signed-off-by: Christian Lamparter Link: https://lore.kernel.org/r/43b19cbd4e7f51e9509e561b02b5d8d0e7079fac.1645175187.git.chunkeey@gmail.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/lm70.rst | 7 +++++++ drivers/hwmon/Kconfig | 4 ++-- drivers/hwmon/lm70.c | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Documentation/hwmon/lm70.rst b/Documentation/hwmon/lm70.rst index 6ddc5b67ccb54..11303a7e16a80 100644 --- a/Documentation/hwmon/lm70.rst +++ b/Documentation/hwmon/lm70.rst @@ -15,6 +15,10 @@ Supported chips: Information: https://www.ti.com/product/tmp122 + * Texas Instruments TMP125 + + Information: https://www.ti.com/product/tmp125 + * National Semiconductor LM71 Datasheet: https://www.ti.com/product/LM71 @@ -53,6 +57,9 @@ The LM74 and TMP121/TMP122/TMP123/TMP124 are very similar; main difference is The TMP122/TMP124 also feature configurable temperature thresholds. +The TMP125 is less accurate and provides 10-bit temperature data +with 0.25 degrees Celsius resolution. + The LM71 is also very similar; main difference is 14-bit temperature data (0.03125 degrees celsius resolution). diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index b3597ba66ddb9..463d5b110a85f 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1224,8 +1224,8 @@ config SENSORS_LM70 depends on SPI_MASTER help If you say yes here you get support for the National Semiconductor - LM70, LM71, LM74 and Texas Instruments TMP121/TMP123 digital tempera- - ture sensor chips. + LM70, LM71, LM74 and Texas Instruments TMP121/TMP123, TMP122/TMP124, + TMP125 digital temperature sensor chips. This driver can also be built as a module. If so, the module will be called lm70. diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c index d2a60de5b8de9..c20a749fc7f21 100644 --- a/drivers/hwmon/lm70.c +++ b/drivers/hwmon/lm70.c @@ -34,6 +34,7 @@ #define LM70_CHIP_LM71 2 /* NS LM71 */ #define LM70_CHIP_LM74 3 /* NS LM74 */ #define LM70_CHIP_TMP122 4 /* TI TMP122/TMP124 */ +#define LM70_CHIP_TMP125 5 /* TI TMP125 */ struct lm70 { struct spi_device *spi; @@ -87,6 +88,12 @@ static ssize_t temp1_input_show(struct device *dev, * LM71: * 14 bits of 2's complement data, discard LSB 2 bits, * resolution 0.0312 degrees celsius. + * + * TMP125: + * MSB/D15 is a leading zero. D14 is the sign-bit. This is + * followed by 9 temperature bits (D13..D5) in 2's complement + * data format with a resolution of 0.25 degrees celsius per unit. + * LSB 5 bits (D4..D0) share the same value as D5 and get discarded. */ switch (p_lm70->chip) { case LM70_CHIP_LM70: @@ -102,6 +109,10 @@ static ssize_t temp1_input_show(struct device *dev, case LM70_CHIP_LM71: val = ((int)raw / 4) * 3125 / 100; break; + + case LM70_CHIP_TMP125: + val = (sign_extend32(raw, 14) / 32) * 250; + break; } status = sprintf(buf, "%d\n", val); /* millidegrees Celsius */ @@ -135,6 +146,10 @@ static const struct of_device_id lm70_of_ids[] = { .compatible = "ti,tmp122", .data = (void *) LM70_CHIP_TMP122, }, + { + .compatible = "ti,tmp125", + .data = (void *) LM70_CHIP_TMP125, + }, { .compatible = "ti,lm71", .data = (void *) LM70_CHIP_LM71, @@ -184,6 +199,7 @@ static const struct spi_device_id lm70_ids[] = { { "lm70", LM70_CHIP_LM70 }, { "tmp121", LM70_CHIP_TMP121 }, { "tmp122", LM70_CHIP_TMP122 }, + { "tmp125", LM70_CHIP_TMP125 }, { "lm71", LM70_CHIP_LM71 }, { "lm74", LM70_CHIP_LM74 }, { }, -- GitLab From e75d16e58467c5703821e12536c7dc438f3c425d Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 24 Feb 2022 07:12:09 +0100 Subject: [PATCH 0780/1586] hwmon: (core) Add support for pwm auto channels attribute pwm[1-*]_auto_channels_temp is documented as an official hwmon sysfs attribute, yet there is no support for it in the new with_info-API. Fix that. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220224061210.16452-2-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 1 + include/linux/hwmon.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 0d6c6809f26cc..989e2c8496dd2 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -604,6 +604,7 @@ static const char * const hwmon_pwm_attr_templates[] = { [hwmon_pwm_enable] = "pwm%d_enable", [hwmon_pwm_mode] = "pwm%d_mode", [hwmon_pwm_freq] = "pwm%d_freq", + [hwmon_pwm_auto_channels_temp] = "pwm%d_auto_channels_temp", }; static const char * const hwmon_intrusion_attr_templates[] = { diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index fad1f1df26df7..eba380b76d157 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -332,12 +332,14 @@ enum hwmon_pwm_attributes { hwmon_pwm_enable, hwmon_pwm_mode, hwmon_pwm_freq, + hwmon_pwm_auto_channels_temp, }; #define HWMON_PWM_INPUT BIT(hwmon_pwm_input) #define HWMON_PWM_ENABLE BIT(hwmon_pwm_enable) #define HWMON_PWM_MODE BIT(hwmon_pwm_mode) #define HWMON_PWM_FREQ BIT(hwmon_pwm_freq) +#define HWMON_PWM_AUTO_CHANNELS_TEMP BIT(hwmon_pwm_auto_channels_temp) enum hwmon_intrusion_attributes { hwmon_intrusion_alarm, -- GitLab From aa9f833dfc128169a1162261c5641aa516b4a231 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 24 Feb 2022 07:12:10 +0100 Subject: [PATCH 0781/1586] hwmon: (sch5627) Add pwmX_auto_channels_temp support After doing some research, it seems that Fujitsu's hardware monitoring solution exports data describing which temperature sensors affect which fans, similar to the data in fan_source of the ftsteutates driver. Writing 0 into these registers forces the fans to full speed. Export this data with standard attributes. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220224061210.16452-3-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- Documentation/hwmon/sch5627.rst | 4 +++ drivers/hwmon/sch5627.c | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/Documentation/hwmon/sch5627.rst b/Documentation/hwmon/sch5627.rst index 187682e99114f..ecb4fc84d0451 100644 --- a/Documentation/hwmon/sch5627.rst +++ b/Documentation/hwmon/sch5627.rst @@ -20,6 +20,10 @@ Description SMSC SCH5627 Super I/O chips include complete hardware monitoring capabilities. They can monitor up to 5 voltages, 4 fans and 8 temperatures. +In addition, the SCH5627 exports data describing which temperature sensors +affect the speed of each fan. Setting pwmX_auto_channels_temp to 0 forces +the corresponding fan to full speed until another value is written. + The SMSC SCH5627 hardware monitoring part also contains an integrated watchdog. In order for this watchdog to function some motherboard specific initialization most be done by the BIOS, so if the watchdog is not enabled diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c index 72c3f6757e348..25fbbd4c9a2b3 100644 --- a/drivers/hwmon/sch5627.c +++ b/drivers/hwmon/sch5627.c @@ -52,6 +52,9 @@ static const u16 SCH5627_REG_FAN[SCH5627_NO_FANS] = { static const u16 SCH5627_REG_FAN_MIN[SCH5627_NO_FANS] = { 0x62, 0x64, 0x66, 0x68 }; +static const u16 SCH5627_REG_PWM_MAP[SCH5627_NO_FANS] = { + 0xA0, 0xA1, 0xA2, 0xA3 }; + static const u16 SCH5627_REG_IN_MSB[SCH5627_NO_IN] = { 0x22, 0x23, 0x24, 0x25, 0x189 }; static const u16 SCH5627_REG_IN_LSN[SCH5627_NO_IN] = { @@ -223,6 +226,9 @@ static int reg_to_rpm(u16 reg) static umode_t sch5627_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) { + if (type == hwmon_pwm && attr == hwmon_pwm_auto_channels_temp) + return 0644; + return 0444; } @@ -278,6 +284,23 @@ static int sch5627_read(struct device *dev, enum hwmon_sensor_types type, u32 at break; } break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_auto_channels_temp: + mutex_lock(&data->update_lock); + ret = sch56xx_read_virtual_reg(data->addr, SCH5627_REG_PWM_MAP[channel]); + mutex_unlock(&data->update_lock); + + if (ret < 0) + return ret; + + *val = ret; + + return 0; + default: + break; + } + break; case hwmon_in: ret = sch5627_update_in(data); if (ret < 0) @@ -318,10 +341,42 @@ static int sch5627_read_string(struct device *dev, enum hwmon_sensor_types type, return -EOPNOTSUPP; } +static int sch5627_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, + long val) +{ + struct sch5627_data *data = dev_get_drvdata(dev); + int ret; + + switch (type) { + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_auto_channels_temp: + /* registers are 8 bit wide */ + if (val > U8_MAX || val < 0) + return -EINVAL; + + mutex_lock(&data->update_lock); + ret = sch56xx_write_virtual_reg(data->addr, SCH5627_REG_PWM_MAP[channel], + val); + mutex_unlock(&data->update_lock); + + return ret; + default: + break; + } + break; + default: + break; + } + + return -EOPNOTSUPP; +} + static const struct hwmon_ops sch5627_ops = { .is_visible = sch5627_is_visible, .read = sch5627_read, .read_string = sch5627_read_string, + .write = sch5627_write, }; static const struct hwmon_channel_info *sch5627_info[] = { @@ -342,6 +397,12 @@ static const struct hwmon_channel_info *sch5627_info[] = { HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_FAULT, HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_FAULT ), + HWMON_CHANNEL_INFO(pwm, + HWMON_PWM_AUTO_CHANNELS_TEMP, + HWMON_PWM_AUTO_CHANNELS_TEMP, + HWMON_PWM_AUTO_CHANNELS_TEMP, + HWMON_PWM_AUTO_CHANNELS_TEMP + ), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL, -- GitLab From 2fd3eec19c6e0a2c218853db9df27d4e74921673 Mon Sep 17 00:00:00 2001 From: Aleksa Savic Date: Sun, 27 Feb 2022 21:56:25 +0100 Subject: [PATCH 0782/1586] hwmon: (aquacomputer_d5next) Add support for Aquacomputer Farbwerk 360 Extend aquacomputer_d5next driver to expose hardware temperature sensors of the Aquacomputer Farbwerk 360 RGB controller, which communicates through a proprietary USB HID protocol. Four temperature sensors are available. Additionally, serial number and firmware version are exposed through debugfs. This driver has been tested on x86_64. Signed-off-by: Aleksa Savic Signed-off-by: Guenter Roeck --- Documentation/hwmon/aquacomputer_d5next.rst | 49 ++- drivers/hwmon/aquacomputer_d5next.c | 379 +++++++++++++------- 2 files changed, 278 insertions(+), 150 deletions(-) diff --git a/Documentation/hwmon/aquacomputer_d5next.rst b/Documentation/hwmon/aquacomputer_d5next.rst index 1f4bb4ba2e4bf..3373e27b707d6 100644 --- a/Documentation/hwmon/aquacomputer_d5next.rst +++ b/Documentation/hwmon/aquacomputer_d5next.rst @@ -6,22 +6,21 @@ Kernel driver aquacomputer-d5next Supported devices: * Aquacomputer D5 Next watercooling pump +* Aquacomputer Farbwerk 360 RGB controller Author: Aleksa Savic Description ----------- -This driver exposes hardware sensors of the Aquacomputer D5 Next watercooling -pump, which communicates through a proprietary USB HID protocol. +This driver exposes hardware sensors of listed Aquacomputer devices, which +communicate through proprietary USB HID protocols. -Available sensors are pump and fan speed, power, voltage and current, as -well as coolant temperature. Also available through debugfs are the serial -number, firmware version and power-on count. - -Attaching a fan is optional and allows it to be controlled using temperature -curves directly from the pump. If it's not connected, the fan-related sensors -will report zeroes. +For the D5 Next pump, available sensors are pump and fan speed, power, voltage +and current, as well as coolant temperature. Also available through debugfs are +the serial number, firmware version and power-on count. Attaching a fan to it is +optional and allows it to be controlled using temperature curves directly from the +pump. If it's not connected, the fan-related sensors will report zeroes. The pump can be configured either through software or via its physical interface. Configuring the pump through this driver is not implemented, as it @@ -29,33 +28,31 @@ seems to require sending it a complete configuration. That includes addressable RGB LEDs, for which there is no standard sysfs interface. Thus, that task is better suited for userspace tools. +The Farbwerk 360 exposes four temperature sensors. Depending on the device, +not all sysfs and debugfs entries will be available. + Usage notes ----------- -The pump communicates via HID reports. The driver is loaded automatically by +The devices communicate via HID reports. The driver is loaded automatically by the kernel and supports hotswapping. Sysfs entries ------------- -============ ============================================= -temp1_input Coolant temperature (in millidegrees Celsius) -fan1_input Pump speed (in RPM) -fan2_input Fan speed (in RPM) -power1_input Pump power (in micro Watts) -power2_input Fan power (in micro Watts) -in0_input Pump voltage (in milli Volts) -in1_input Fan voltage (in milli Volts) -in2_input +5V rail voltage (in milli Volts) -curr1_input Pump current (in milli Amperes) -curr2_input Fan current (in milli Amperes) -============ ============================================= +================ ============================================= +temp[1-4]_input Temperature sensors (in millidegrees Celsius) +fan[1-2]_input Pump/fan speed (in RPM) +power[1-2]_input Pump/fan power (in micro Watts) +in[0-2]_input Pump/fan voltage (in milli Volts) +curr[1-2]_input Pump/fan current (in milli Amperes) +================ ============================================= Debugfs entries --------------- -================ =============================================== -serial_number Serial number of the pump +================ ================================================= +serial_number Serial number of the device firmware_version Version of installed firmware -power_cycles Count of how many times the pump was powered on -================ =============================================== +power_cycles Count of how many times the device was powered on +================ ================================================= diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c index fb9341a530511..525809cf7c952 100644 --- a/drivers/hwmon/aquacomputer_d5next.c +++ b/drivers/hwmon/aquacomputer_d5next.c @@ -1,32 +1,41 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * hwmon driver for Aquacomputer D5 Next watercooling pump + * hwmon driver for Aquacomputer devices (D5 Next, Farbwerk 360) * - * The D5 Next sends HID reports (with ID 0x01) every second to report sensor values - * (coolant temperature, pump and fan speed, voltage, current and power). It responds to - * Get_Report requests, but returns a dummy value of no use. + * Aquacomputer devices send HID reports (with ID 0x01) every second to report + * sensor values. * * Copyright 2021 Aleksa Savic */ -#include #include #include #include #include #include #include +#include -#define DRIVER_NAME "aquacomputer-d5next" +#define USB_VENDOR_ID_AQUACOMPUTER 0x0c70 +#define USB_PRODUCT_ID_D5NEXT 0xf00e +#define USB_PRODUCT_ID_FARBWERK360 0xf010 -#define D5NEXT_STATUS_REPORT_ID 0x01 -#define D5NEXT_STATUS_UPDATE_INTERVAL (2 * HZ) /* In seconds */ +enum kinds { d5next, farbwerk360 }; -/* Register offsets for the D5 Next pump */ +static const char *const aqc_device_names[] = { + [d5next] = "d5next", + [farbwerk360] = "farbwerk360" +}; -#define D5NEXT_SERIAL_FIRST_PART 3 -#define D5NEXT_SERIAL_SECOND_PART 5 -#define D5NEXT_FIRMWARE_VERSION 13 +#define DRIVER_NAME "aquacomputer_d5next" + +#define STATUS_REPORT_ID 0x01 +#define STATUS_UPDATE_INTERVAL (2 * HZ) /* In seconds */ +#define SERIAL_FIRST_PART 3 +#define SERIAL_SECOND_PART 5 +#define FIRMWARE_VERSION 13 + +/* Register offsets for the D5 Next pump */ #define D5NEXT_POWER_CYCLES 24 #define D5NEXT_COOLANT_TEMP 87 @@ -44,76 +53,118 @@ #define D5NEXT_PUMP_CURRENT 112 #define D5NEXT_FAN_CURRENT 99 -/* Labels for provided values */ +/* Register offsets for the Farbwerk 360 RGB controller */ +#define FARBWERK360_NUM_SENSORS 4 +#define FARBWERK360_SENSOR_START 0x32 +#define FARBWERK360_SENSOR_SIZE 0x02 +#define FARBWERK360_SENSOR_DISCONNECTED 0x7FFF -#define L_COOLANT_TEMP "Coolant temp" +/* Labels for D5 Next */ +#define L_D5NEXT_COOLANT_TEMP "Coolant temp" -#define L_PUMP_SPEED "Pump speed" -#define L_FAN_SPEED "Fan speed" - -#define L_PUMP_POWER "Pump power" -#define L_FAN_POWER "Fan power" - -#define L_PUMP_VOLTAGE "Pump voltage" -#define L_FAN_VOLTAGE "Fan voltage" -#define L_5V_VOLTAGE "+5V voltage" - -#define L_PUMP_CURRENT "Pump current" -#define L_FAN_CURRENT "Fan current" +static const char *const label_d5next_speeds[] = { + "Pump speed", + "Fan speed" +}; -static const char *const label_speeds[] = { - L_PUMP_SPEED, - L_FAN_SPEED, +static const char *const label_d5next_power[] = { + "Pump power", + "Fan power" }; -static const char *const label_power[] = { - L_PUMP_POWER, - L_FAN_POWER, +static const char *const label_d5next_voltages[] = { + "Pump voltage", + "Fan voltage", + "+5V voltage" }; -static const char *const label_voltages[] = { - L_PUMP_VOLTAGE, - L_FAN_VOLTAGE, - L_5V_VOLTAGE, +static const char *const label_d5next_current[] = { + "Pump current", + "Fan current" }; -static const char *const label_current[] = { - L_PUMP_CURRENT, - L_FAN_CURRENT, +/* Labels for Farbwerk 360 temperature sensors */ +static const char *const label_temp_sensors[] = { + "Sensor 1", + "Sensor 2", + "Sensor 3", + "Sensor 4" }; -struct d5next_data { +struct aqc_data { struct hid_device *hdev; struct device *hwmon_dev; struct dentry *debugfs; - s32 temp_input; + enum kinds kind; + const char *name; + + /* General info, same across all devices */ + u32 serial_number[2]; + u16 firmware_version; + + /* D5 Next specific - how many times the device was powered on */ + u32 power_cycles; + + /* Sensor values */ + s32 temp_input[4]; u16 speed_input[2]; u32 power_input[2]; u16 voltage_input[3]; u16 current_input[2]; - u32 serial_number[2]; - u16 firmware_version; - u32 power_cycles; /* How many times the device was powered on */ + unsigned long updated; }; -static umode_t d5next_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, - int channel) +static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, + int channel) { - return 0444; + const struct aqc_data *priv = data; + + switch (type) { + case hwmon_temp: + switch (priv->kind) { + case d5next: + if (channel == 0) + return 0444; + break; + case farbwerk360: + return 0444; + default: + break; + } + break; + case hwmon_fan: + case hwmon_power: + case hwmon_in: + case hwmon_curr: + switch (priv->kind) { + case d5next: + return 0444; + default: + break; + } + break; + default: + break; + } + + return 0; } -static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, - long *val) +static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) { - struct d5next_data *priv = dev_get_drvdata(dev); + struct aqc_data *priv = dev_get_drvdata(dev); - if (time_after(jiffies, priv->updated + D5NEXT_STATUS_UPDATE_INTERVAL)) + if (time_after(jiffies, priv->updated + STATUS_UPDATE_INTERVAL)) return -ENODATA; switch (type) { case hwmon_temp: - *val = priv->temp_input; + if (priv->temp_input[channel] == -ENODATA) + return -ENODATA; + + *val = priv->temp_input[channel]; break; case hwmon_fan: *val = priv->speed_input[channel]; @@ -134,24 +185,59 @@ static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 att return 0; } -static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, - int channel, const char **str) +static int aqc_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) { + struct aqc_data *priv = dev_get_drvdata(dev); + switch (type) { case hwmon_temp: - *str = L_COOLANT_TEMP; + switch (priv->kind) { + case d5next: + *str = L_D5NEXT_COOLANT_TEMP; + break; + case farbwerk360: + *str = label_temp_sensors[channel]; + break; + default: + break; + } break; case hwmon_fan: - *str = label_speeds[channel]; + switch (priv->kind) { + case d5next: + *str = label_d5next_speeds[channel]; + break; + default: + break; + } break; case hwmon_power: - *str = label_power[channel]; + switch (priv->kind) { + case d5next: + *str = label_d5next_power[channel]; + break; + default: + break; + } break; case hwmon_in: - *str = label_voltages[channel]; + switch (priv->kind) { + case d5next: + *str = label_d5next_voltages[channel]; + break; + default: + break; + } break; case hwmon_curr: - *str = label_current[channel]; + switch (priv->kind) { + case d5next: + *str = label_d5next_current[channel]; + break; + default: + break; + } break; default: return -EOPNOTSUPP; @@ -160,60 +246,89 @@ static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type, return 0; } -static const struct hwmon_ops d5next_hwmon_ops = { - .is_visible = d5next_is_visible, - .read = d5next_read, - .read_string = d5next_read_string, +static const struct hwmon_ops aqc_hwmon_ops = { + .is_visible = aqc_is_visible, + .read = aqc_read, + .read_string = aqc_read_string, }; -static const struct hwmon_channel_info *d5next_info[] = { - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL), - HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL, HWMON_F_INPUT | HWMON_F_LABEL), - HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL, HWMON_P_INPUT | HWMON_P_LABEL), - HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL, +static const struct hwmon_channel_info *aqc_info[] = { + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), + HWMON_CHANNEL_INFO(fan, + HWMON_F_INPUT | HWMON_F_LABEL, + HWMON_F_INPUT | HWMON_F_LABEL), + HWMON_CHANNEL_INFO(power, + HWMON_P_INPUT | HWMON_P_LABEL, + HWMON_P_INPUT | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(in, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), - HWMON_CHANNEL_INFO(curr, HWMON_C_INPUT | HWMON_C_LABEL, HWMON_C_INPUT | HWMON_C_LABEL), + HWMON_CHANNEL_INFO(curr, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL), NULL }; -static const struct hwmon_chip_info d5next_chip_info = { - .ops = &d5next_hwmon_ops, - .info = d5next_info, +static const struct hwmon_chip_info aqc_chip_info = { + .ops = &aqc_hwmon_ops, + .info = aqc_info, }; -static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) +static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, + int size) { - struct d5next_data *priv; + int i, sensor_value; + struct aqc_data *priv; - if (report->id != D5NEXT_STATUS_REPORT_ID) + if (report->id != STATUS_REPORT_ID) return 0; priv = hid_get_drvdata(hdev); /* Info provided with every report */ - - priv->serial_number[0] = get_unaligned_be16(data + D5NEXT_SERIAL_FIRST_PART); - priv->serial_number[1] = get_unaligned_be16(data + D5NEXT_SERIAL_SECOND_PART); - - priv->firmware_version = get_unaligned_be16(data + D5NEXT_FIRMWARE_VERSION); - priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES); + priv->serial_number[0] = get_unaligned_be16(data + SERIAL_FIRST_PART); + priv->serial_number[1] = get_unaligned_be16(data + SERIAL_SECOND_PART); + priv->firmware_version = get_unaligned_be16(data + FIRMWARE_VERSION); /* Sensor readings */ + switch (priv->kind) { + case d5next: + priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES); - priv->temp_input = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10; + priv->temp_input[0] = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10; - priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED); - priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED); + priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED); + priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED); - priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000; - priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000; + priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000; + priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000; - priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10; - priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10; - priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10; + priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10; + priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10; + priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10; - priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT); - priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT); + priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT); + priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT); + break; + case farbwerk360: + /* Temperature sensor readings */ + for (i = 0; i < FARBWERK360_NUM_SENSORS; i++) { + sensor_value = get_unaligned_be16(data + FARBWERK360_SENSOR_START + + i * FARBWERK360_SENSOR_SIZE); + if (sensor_value == FARBWERK360_SENSOR_DISCONNECTED) + priv->temp_input[i] = -ENODATA; + else + priv->temp_input[i] = sensor_value * 10; + } + break; + default: + break; + } priv->updated = jiffies; @@ -224,7 +339,7 @@ static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report, static int serial_number_show(struct seq_file *seqf, void *unused) { - struct d5next_data *priv = seqf->private; + struct aqc_data *priv = seqf->private; seq_printf(seqf, "%05u-%05u\n", priv->serial_number[0], priv->serial_number[1]); @@ -234,7 +349,7 @@ DEFINE_SHOW_ATTRIBUTE(serial_number); static int firmware_version_show(struct seq_file *seqf, void *unused) { - struct d5next_data *priv = seqf->private; + struct aqc_data *priv = seqf->private; seq_printf(seqf, "%u\n", priv->firmware_version); @@ -244,7 +359,7 @@ DEFINE_SHOW_ATTRIBUTE(firmware_version); static int power_cycles_show(struct seq_file *seqf, void *unused) { - struct d5next_data *priv = seqf->private; + struct aqc_data *priv = seqf->private; seq_printf(seqf, "%u\n", priv->power_cycles); @@ -252,29 +367,32 @@ static int power_cycles_show(struct seq_file *seqf, void *unused) } DEFINE_SHOW_ATTRIBUTE(power_cycles); -static void d5next_debugfs_init(struct d5next_data *priv) +static void aqc_debugfs_init(struct aqc_data *priv) { - char name[32]; + char name[64]; - scnprintf(name, sizeof(name), "%s-%s", DRIVER_NAME, dev_name(&priv->hdev->dev)); + scnprintf(name, sizeof(name), "%s_%s-%s", "aquacomputer", priv->name, + dev_name(&priv->hdev->dev)); priv->debugfs = debugfs_create_dir(name, NULL); debugfs_create_file("serial_number", 0444, priv->debugfs, priv, &serial_number_fops); debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops); - debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops); + + if (priv->kind == d5next) + debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops); } #else -static void d5next_debugfs_init(struct d5next_data *priv) +static void aqc_debugfs_init(struct aqc_data *priv) { } #endif -static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id) +static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct d5next_data *priv; + struct aqc_data *priv; int ret; priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL); @@ -284,7 +402,7 @@ static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->hdev = hdev; hid_set_drvdata(hdev, priv); - priv->updated = jiffies - D5NEXT_STATUS_UPDATE_INTERVAL; + priv->updated = jiffies - STATUS_UPDATE_INTERVAL; ret = hid_parse(hdev); if (ret) @@ -298,15 +416,28 @@ static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id) if (ret) goto fail_and_stop; - priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "d5next", priv, - &d5next_chip_info, NULL); + switch (hdev->product) { + case USB_PRODUCT_ID_D5NEXT: + priv->kind = d5next; + break; + case USB_PRODUCT_ID_FARBWERK360: + priv->kind = farbwerk360; + break; + default: + break; + } + + priv->name = aqc_device_names[priv->kind]; + + priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, priv->name, priv, + &aqc_chip_info, NULL); if (IS_ERR(priv->hwmon_dev)) { ret = PTR_ERR(priv->hwmon_dev); goto fail_and_close; } - d5next_debugfs_init(priv); + aqc_debugfs_init(priv); return 0; @@ -317,9 +448,9 @@ fail_and_stop: return ret; } -static void d5next_remove(struct hid_device *hdev) +static void aqc_remove(struct hid_device *hdev) { - struct d5next_data *priv = hid_get_drvdata(hdev); + struct aqc_data *priv = hid_get_drvdata(hdev); debugfs_remove_recursive(priv->debugfs); hwmon_device_unregister(priv->hwmon_dev); @@ -328,36 +459,36 @@ static void d5next_remove(struct hid_device *hdev) hid_hw_stop(hdev); } -static const struct hid_device_id d5next_table[] = { - { HID_USB_DEVICE(0x0c70, 0xf00e) }, /* Aquacomputer D5 Next */ - {}, +static const struct hid_device_id aqc_table[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_D5NEXT) }, + { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_FARBWERK360) }, + { } }; -MODULE_DEVICE_TABLE(hid, d5next_table); +MODULE_DEVICE_TABLE(hid, aqc_table); -static struct hid_driver d5next_driver = { +static struct hid_driver aqc_driver = { .name = DRIVER_NAME, - .id_table = d5next_table, - .probe = d5next_probe, - .remove = d5next_remove, - .raw_event = d5next_raw_event, + .id_table = aqc_table, + .probe = aqc_probe, + .remove = aqc_remove, + .raw_event = aqc_raw_event, }; -static int __init d5next_init(void) +static int __init aqc_init(void) { - return hid_register_driver(&d5next_driver); + return hid_register_driver(&aqc_driver); } -static void __exit d5next_exit(void) +static void __exit aqc_exit(void) { - hid_unregister_driver(&d5next_driver); + hid_unregister_driver(&aqc_driver); } /* Request to initialize after the HID bus to ensure it's not being loaded before */ - -late_initcall(d5next_init); -module_exit(d5next_exit); +late_initcall(aqc_init); +module_exit(aqc_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Aleksa Savic "); -MODULE_DESCRIPTION("Hwmon driver for Aquacomputer D5 Next pump"); +MODULE_DESCRIPTION("Hwmon driver for Aquacomputer devices"); -- GitLab From ee55c9c5734e34d7bf40c4f91d181622630626ff Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 7 Feb 2022 14:10:08 +0100 Subject: [PATCH 0783/1586] m68k: defconfig: Update defconfigs for v5.17-rc3 - Drop CONFIG_NFT_COUNTER=m (removed in commit 023223dfbfb34fcc ("netfilter: nf_tables: make counter support built-in")), - Drop CONFIG_NF_FLOW_TABLE_IPV4=m and CONFIG_NF_FLOW_TABLE_IPV6=m (can no longer be enabled since commit c42ba4290b2147aa ("netfilter: flowtable: remove ipv4/ipv6 modules")), - Enable the new option for legacy fbcon hardware acceleration code on all platforms that have a frame buffer device that can make use of it, - Drop CONFIG_TEST_HASH=m (replaced by auto-modular CONFIG_HASH_KUNIT_TEST in commit 0acc968f352336a4 ("test_hash.c: refactor into kunit")), - Enable modular build of the new siphash selftest. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220123125543.2658445-1-geert@linux-m68k.org Link: https://lore.kernel.org/r/20220207131008.2202977-1-geert@linux-m68k.org --- arch/m68k/configs/amiga_defconfig | 6 ++---- arch/m68k/configs/apollo_defconfig | 6 ++---- arch/m68k/configs/atari_defconfig | 5 +---- arch/m68k/configs/bvme6000_defconfig | 5 +---- arch/m68k/configs/hp300_defconfig | 6 ++---- arch/m68k/configs/mac_defconfig | 5 +---- arch/m68k/configs/multi_defconfig | 6 ++---- arch/m68k/configs/mvme147_defconfig | 5 +---- arch/m68k/configs/mvme16x_defconfig | 5 +---- arch/m68k/configs/q40_defconfig | 5 +---- arch/m68k/configs/sun3_defconfig | 5 +---- arch/m68k/configs/sun3x_defconfig | 5 +---- 12 files changed, 16 insertions(+), 48 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index bc9952f8be667..49f301c57df5f 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -104,7 +104,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -204,7 +203,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -229,7 +227,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -435,6 +432,7 @@ CONFIG_FB_AMIGA_ECS=y CONFIG_FB_AMIGA_AGA=y CONFIG_FB_FM2=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_DMASOUND_PAULA=m @@ -643,7 +641,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index a77269c6e5bac..405997b614472 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -100,7 +100,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -200,7 +199,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -225,7 +223,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -393,6 +390,7 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set @@ -599,7 +597,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 7a74efa6b9a1a..eb342a33b73e8 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -107,7 +107,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -207,7 +206,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -232,7 +230,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -621,7 +618,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index a5323bf2eb333..e6de6b4dff86e 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5e80aa0869d54..048d9b114eb43 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -99,7 +99,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -199,7 +198,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -224,7 +222,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -395,6 +392,7 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -601,7 +599,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index e84326a3f62db..4e5b32ba00dfc 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -623,7 +620,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 337552f433390..7df61e743591a 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -118,7 +118,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -218,7 +217,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -243,7 +241,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -497,6 +494,7 @@ CONFIG_FB_ATARI=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_DMASOUND_ATARI=m @@ -708,7 +706,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7b688f7d272a2..80922fe80d9de 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -96,7 +96,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -196,7 +195,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -221,7 +219,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -591,7 +588,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 7c2cb31d63dd8..530c4cf7c59ba 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index ca43897af26de..d3f371e490ec0 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -610,7 +607,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index e3d515f37144a..f214f277c55a1 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -593,7 +590,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index d601606c969b8..73b4c61394a2a 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -593,7 +590,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m -- GitLab From 0d52a01a266ba21840c7cb02e1de39c503759dda Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 7 Feb 2022 14:11:23 +0100 Subject: [PATCH 0784/1586] m68k: defconfig: Disable fbdev on Sun3/3x Commit e019630e78e3482c ("[PATCH] remove broken video drivers") in v2.6.21 removed Sun3 framebuffer support, hence there are no more frame buffer device drivers available on the Sun3 and Sun3x platforms. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220207131123.2203141-1-geert@linux-m68k.org --- arch/m68k/configs/sun3_defconfig | 3 --- arch/m68k/configs/sun3x_defconfig | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index f214f277c55a1..db6769790bdb6 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -385,9 +385,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 73b4c61394a2a..e9c362683666a 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -384,9 +384,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m -- GitLab From e65b831a1e191caff3fc0d06bc7019cdaf8f868e Mon Sep 17 00:00:00 2001 From: Qinghua Jin Date: Fri, 7 Jan 2022 10:22:58 +0800 Subject: [PATCH 0785/1586] nvme-fc: fix a typo subsytem -> subsystem Signed-off-by: Qinghua Jin Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index cb909edb76c48..5358a5facdee6 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -721,7 +721,7 @@ enum { * * Fields with static values for the port. Initialized by the * port_info struct supplied to the registration call. - * @port_num: NVME-FC transport subsytem port number + * @port_num: NVME-FC transport subsystem port number * @node_name: FC WWNN for the port * @port_name: FC WWPN for the port * @private: pointer to memory allocated alongside the local port -- GitLab From 50ab19d89feaf4ebeca6872b46da4b503eee20c1 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 18 Jan 2022 23:49:54 -0800 Subject: [PATCH 0786/1586] nvme-core: remove unnecessary semicolon It is not a good practice to have a semicolon at the end of the function definition. Remove it from nvme_pr_type(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e0bfda04bd7b..a1d793e799825 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1979,7 +1979,7 @@ static char nvme_pr_type(enum pr_type type) default: return 0; } -}; +} static int nvme_send_ns_head_pr_command(struct block_device *bdev, struct nvme_command *c, u8 data[16]) -- GitLab From ba3266434d6615ff8015b01846d0e7756c9ad936 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Fri, 21 Jan 2022 21:05:39 -0800 Subject: [PATCH 0787/1586] nvme-core: remove unnecessary function parameter In function nvme_execute_rq() we don't use gendisk parameter at all. Remove the unsed parameter and adjust the calls. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a1d793e799825..dd18861f77c05 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1049,8 +1049,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); * >0: nvme controller's cqe status response * <0: kernel error in lieu of controller response */ -static int nvme_execute_rq(struct gendisk *disk, struct request *rq, - bool at_head) +static int nvme_execute_rq(struct request *rq, bool at_head) { blk_status_t status; @@ -1090,7 +1089,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - ret = nvme_execute_rq(NULL, req, at_head); + ret = nvme_execute_rq(req, at_head); if (result && ret >= 0) *result = nvme_req(req)->result; out: @@ -1206,12 +1205,11 @@ int nvme_execute_passthru_rq(struct request *rq) struct nvme_command *cmd = nvme_req(rq)->cmd; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; struct nvme_ns *ns = rq->q->queuedata; - struct gendisk *disk = ns ? ns->disk : NULL; u32 effects; int ret; effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - ret = nvme_execute_rq(disk, rq, false); + ret = nvme_execute_rq(rq, false); if (effects) /* nothing to be done for zero cmd effects */ nvme_passthru_end(ctrl, effects, cmd, ret); -- GitLab From 572c97355bdc0e7cdd5979304b8554712e26ceb8 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 11 Jan 2022 22:20:59 -0800 Subject: [PATCH 0788/1586] nvme-fabrics: use unsigned int type Loop variable i will never have a negative value, so use unsigned int type instaed of int. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index f79a66d4e22ce..9f81a0562e3e9 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -873,7 +873,7 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, unsigned int required_opts) { if ((opts->mask & required_opts) != required_opts) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { if ((opt_tokens[i].token & required_opts) && -- GitLab From 0801a4b630ab0949ddf0fc3f26cb17976e2d3afb Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 11 Jan 2022 22:21:00 -0800 Subject: [PATCH 0789/1586] nvme-fabrics: use unsigned int type Loop variable i will never have a negative value, so use unsigned int type instaed of int. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 9f81a0562e3e9..131e78e62d00d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -923,7 +923,7 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { if (opts->mask & ~allowed_opts) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { if ((opt_tokens[i].token & opts->mask) && -- GitLab From 72b3eab456ba57bf933b25e1b2adea3e3eafd153 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 11 Jan 2022 22:20:57 -0800 Subject: [PATCH 0790/1586] nvme-fabrics: use consistent zeroout pattern Remove zeroout memeset call & zeroout local variable cmd at the time of declaration in nvmf_ref_read32() similar to what we have done in nvmf_reg_read64(), nvmf_reg_write32(), nvmf_connect_admin_queue(), and nvmf_connect_io_queue(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 131e78e62d00d..f4aa401247209 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -144,11 +144,10 @@ EXPORT_SYMBOL_GPL(nvmf_get_address); */ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) { - struct nvme_command cmd; + struct nvme_command cmd = { }; union nvme_result res; int ret; - memset(&cmd, 0, sizeof(cmd)); cmd.prop_get.opcode = nvme_fabrics_command; cmd.prop_get.fctype = nvme_fabrics_type_property_get; cmd.prop_get.offset = cpu_to_le32(off); -- GitLab From 581f19dd72b9d148ce667b6dcb20d3ef16b9a1cf Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 11 Jan 2022 22:20:58 -0800 Subject: [PATCH 0791/1586] nvme-fabrics: remove unnecessary braces for case Braces are not required for enum value NVME_SC_CONNECT_INVALID_PARAM when used on the switch-case statement, remove the braces. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index f4aa401247209..ee79a6d639b4b 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -271,7 +271,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, int err_sctype = errval & ~NVME_SC_DNR; switch (err_sctype) { - case (NVME_SC_CONNECT_INVALID_PARAM): + case NVME_SC_CONNECT_INVALID_PARAM: if (offset >> 16) { char *inv_data = "Connect Invalid Data Parameter"; -- GitLab From 2caecd62ea5160803b25d96cb1a14ce755c2c259 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 2 Feb 2022 01:04:44 -0800 Subject: [PATCH 0792/1586] nvmet: use i_size_read() to set size for file-ns Instead of calling vfs_getattr() use i_size_read() to read the size of file so we can read the size of not only file type but also block type with one call. This is needed to implement buffered_io support for the NVMeOF block device backend. We also change return type of function nvmet_file_ns_revalidate() from int to void, since this function does not return any meaning value. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 17 ++++------------- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 6be6e59d273bb..6485dc8eb9740 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -14,16 +14,9 @@ #define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MIN_MPOOL_OBJ 16 -int nvmet_file_ns_revalidate(struct nvmet_ns *ns) +void nvmet_file_ns_revalidate(struct nvmet_ns *ns) { - struct kstat stat; - int ret; - - ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE, - AT_STATX_FORCE_SYNC); - if (!ret) - ns->size = stat.size; - return ret; + ns->size = i_size_read(ns->file->f_mapping->host); } void nvmet_file_ns_disable(struct nvmet_ns *ns) @@ -43,7 +36,7 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns) { int flags = O_RDWR | O_LARGEFILE; - int ret; + int ret = 0; if (!ns->buffered_io) flags |= O_DIRECT; @@ -57,9 +50,7 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) return ret; } - ret = nvmet_file_ns_revalidate(ns); - if (ret) - goto err; + nvmet_file_ns_revalidate(ns); /* * i_blkbits can be greater than the universally accepted upper bound, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index af193423c10bb..69637bf8f8e19 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -541,7 +541,7 @@ u16 nvmet_bdev_flush(struct nvmet_req *req); u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); -int nvmet_file_ns_revalidate(struct nvmet_ns *ns); +void nvmet_file_ns_revalidate(struct nvmet_ns *ns); void nvmet_ns_revalidate(struct nvmet_ns *ns); u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts); -- GitLab From 6f6d604b4ef896793901f213db3b45eefa7288fa Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 2 Feb 2022 01:04:45 -0800 Subject: [PATCH 0793/1586] nvmet: allow bdev in buffered_io mode Allow block device to be configured in the buffered I/O mode by using the file backend. In this way now we can use cache for the block device namespace which shows significant performance improvement. We update the block device ns enable function and return early when buffered_io flag is set. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 95c2bbb0b2f5f..e9194804ddee4 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -76,6 +76,14 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) { int ret; + /* + * When buffered_io namespace attribute is enabled that means user want + * this block device to be used as a file, so block device can take + * an advantage of cache. + */ + if (ns->buffered_io) + return -ENOTBLK; + ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); if (IS_ERR(ns->bdev)) { -- GitLab From 8b850475c08caa9545c460d7dc823d0a8c7eafd3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Feb 2022 11:07:27 +0200 Subject: [PATCH 0794/1586] nvme: replace ida_simple[get|remove] with the simler ida_[alloc|free] ida_simple_[get|remove] are wrappers anyways. Also, use ida_alloc_min with the ns_ida as namespace enumeration starts with 1. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dd18861f77c05..da0e19148177c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -561,7 +561,7 @@ static void nvme_free_ns_head(struct kref *ref) container_of(ref, struct nvme_ns_head, ref); nvme_mpath_remove_disk(head); - ida_simple_remove(&head->subsys->ns_ida, head->instance); + ida_free(&head->subsys->ns_ida, head->instance); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); @@ -2565,7 +2565,7 @@ static void nvme_release_subsystem(struct device *dev) container_of(dev, struct nvme_subsystem, dev); if (subsys->instance >= 0) - ida_simple_remove(&nvme_instance_ida, subsys->instance); + ida_free(&nvme_instance_ida, subsys->instance); kfree(subsys); } @@ -3616,7 +3616,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, static void nvme_cdev_rel(struct device *dev) { - ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt)); + ida_free(&nvme_ns_chr_minor_ida, MINOR(dev->devt)); } void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device) @@ -3630,7 +3630,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, { int minor, ret; - minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL); + minor = ida_alloc(&nvme_ns_chr_minor_ida, GFP_KERNEL); if (minor < 0) return minor; cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); @@ -3693,7 +3693,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head = kzalloc(size, GFP_KERNEL); if (!head) goto out; - ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); + ret = ida_alloc_min(&ctrl->subsys->ns_ida, 1, GFP_KERNEL); if (ret < 0) goto out_free_head; head->instance = ret; @@ -3732,7 +3732,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); out_ida_remove: - ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); + ida_free(&ctrl->subsys->ns_ida, head->instance); out_free_head: kfree(head); out: @@ -4429,7 +4429,7 @@ static void nvme_free_ctrl(struct device *dev) struct nvme_subsystem *subsys = ctrl->subsys; if (!subsys || ctrl->instance != subsys->instance) - ida_simple_remove(&nvme_instance_ida, ctrl->instance); + ida_free(&nvme_instance_ida, ctrl->instance); nvme_free_cels(ctrl); nvme_mpath_uninit(ctrl); @@ -4488,7 +4488,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, goto out; } - ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); + ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL); if (ret < 0) goto out; ctrl->instance = ret; @@ -4529,7 +4529,7 @@ out_free_name: nvme_put_ctrl(ctrl); kfree_const(ctrl->device->kobj.name); out_release_instance: - ida_simple_remove(&nvme_instance_ida, ctrl->instance); + ida_free(&nvme_instance_ida, ctrl->instance); out: if (ctrl->discard_page) __free_page(ctrl->discard_page); -- GitLab From 3dd83f4013f0e85d37c059c91015a2721209fe86 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Feb 2022 11:07:28 +0200 Subject: [PATCH 0795/1586] nvme-fc: replace ida_simple[get|remove] with the simler ida_[alloc|free] ida_simple_[get|remove] are wrappers anyways. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 71b3108c22f06..96a5d7fc36f7e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -259,7 +259,7 @@ nvme_fc_free_lport(struct kref *ref) complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); - ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); ida_destroy(&lport->endp_cnt); put_device(lport->dev); @@ -399,7 +399,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } - idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvme_fc_local_port_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_fail_kfree; @@ -439,7 +439,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, return 0; out_ida_put: - ida_simple_remove(&nvme_fc_local_port_cnt, idx); + ida_free(&nvme_fc_local_port_cnt, idx); out_fail_kfree: kfree(newrec); out_reghost_failed: @@ -535,7 +535,7 @@ nvme_fc_free_rport(struct kref *ref) spin_unlock_irqrestore(&nvme_fc_lock, flags); WARN_ON(!list_empty(&rport->disc_list)); - ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); + ida_free(&lport->endp_cnt, rport->remoteport.port_num); kfree(rport); @@ -713,7 +713,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, goto out_lport_put; } - idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&lport->endp_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_kfree_rport; @@ -2393,7 +2393,7 @@ nvme_fc_ctrl_free(struct kref *ref) put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); + ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum); if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); @@ -3472,7 +3472,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvme_fc_ctrl_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_free_ctrl; @@ -3635,7 +3635,7 @@ out_free_queues: kfree(ctrl->queues); out_free_ida: put_device(ctrl->dev); - ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); + ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum); out_free_ctrl: kfree(ctrl); out_fail: -- GitLab From 22027a9811349de28f81e13e20e83299099acd3a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Feb 2022 11:07:29 +0200 Subject: [PATCH 0796/1586] nvmet: replace ida_simple[get|remove] with the simler ida_[alloc|free] ida_simple_[get|remove] are wrappers anyways. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5119c687de683..724a6d373340a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1400,7 +1400,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (subsys->cntlid_min > subsys->cntlid_max) goto out_free_sqs; - ret = ida_simple_get(&cntlid_ida, + ret = ida_alloc_range(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); if (ret < 0) { @@ -1459,7 +1459,7 @@ static void nvmet_ctrl_free(struct kref *ref) flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); - ida_simple_remove(&cntlid_ida, ctrl->cntlid); + ida_free(&cntlid_ida, ctrl->cntlid); nvmet_async_events_free(ctrl); kfree(ctrl->sqs); -- GitLab From 6dd0f465d57c4522294d0fd42fde7603264d0871 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Feb 2022 11:07:30 +0200 Subject: [PATCH 0797/1586] nvmet-fc: replace ida_simple[get|remove] with the simler ida_[alloc|free] ida_simple_[get|remove] are wrappers anyways. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 22b5108168a6a..cb013516784c1 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1115,7 +1115,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) if (!assoc) return NULL; - idx = ida_simple_get(&tgtport->assoc_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&tgtport->assoc_cnt, GFP_KERNEL); if (idx < 0) goto out_free_assoc; @@ -1157,7 +1157,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) out_put: nvmet_fc_tgtport_put(tgtport); out_ida: - ida_simple_remove(&tgtport->assoc_cnt, idx); + ida_free(&tgtport->assoc_cnt, idx); out_free_assoc: kfree(assoc); return NULL; @@ -1183,7 +1183,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) /* if pending Rcv Disconnect Association LS, send rsp now */ if (oldls) nvmet_fc_xmt_ls_rsp(tgtport, oldls); - ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); + ida_free(&tgtport->assoc_cnt, assoc->a_id); dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); @@ -1383,7 +1383,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, goto out_regtgt_failed; } - idx = ida_simple_get(&nvmet_fc_tgtport_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvmet_fc_tgtport_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_fail_kfree; @@ -1433,7 +1433,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, out_free_newrec: put_device(dev); out_ida_put: - ida_simple_remove(&nvmet_fc_tgtport_cnt, idx); + ida_free(&nvmet_fc_tgtport_cnt, idx); out_fail_kfree: kfree(newrec); out_regtgt_failed: @@ -1460,7 +1460,7 @@ nvmet_fc_free_tgtport(struct kref *ref) /* let the LLDD know we've finished tearing it down */ tgtport->ops->targetport_delete(&tgtport->fc_target_port); - ida_simple_remove(&nvmet_fc_tgtport_cnt, + ida_free(&nvmet_fc_tgtport_cnt, tgtport->fc_target_port.port_num); ida_destroy(&tgtport->assoc_cnt); -- GitLab From 7c2566394f8ceba4b99a373c1bc2411750301587 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Feb 2022 11:07:31 +0200 Subject: [PATCH 0798/1586] nvmet-rdma: replace ida_simple[get|remove] with the simler ida_[alloc|free] ida_simple_[get|remove] are wrappers anyways. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1deb4043e2425..00656754e3314 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1356,7 +1356,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) !queue->host_qid); } nvmet_rdma_free_rsps(queue); - ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx); + ida_free(&nvmet_rdma_queue_ida, queue->idx); kfree(queue); } @@ -1459,7 +1459,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, spin_lock_init(&queue->rsps_lock); INIT_LIST_HEAD(&queue->queue_list); - queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); + queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = NVME_RDMA_CM_NO_RSC; goto out_destroy_sq; @@ -1510,7 +1510,7 @@ out_free_cmds: out_free_responses: nvmet_rdma_free_rsps(queue); out_ida_remove: - ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx); + ida_free(&nvmet_rdma_queue_ida, queue->idx); out_destroy_sq: nvmet_sq_destroy(&queue->nvme_sq); out_free_queue: -- GitLab From 44f331a630bdc7c61de9c6760c4eec0133ee9f04 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Feb 2022 11:07:32 +0200 Subject: [PATCH 0799/1586] nvmet-tcp: replace ida_simple[get|remove] with the simler ida_[alloc|free] ida_simple_[get|remove] are wrappers anyways. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c1c43ce466bc..83ca577f72be2 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1473,7 +1473,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) nvmet_tcp_free_crypto(queue); - ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + ida_free(&nvmet_tcp_queue_ida, queue->idx); page = virt_to_head_page(queue->pf_cache.va); __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); @@ -1613,7 +1613,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, init_llist_head(&queue->resp_list); INIT_LIST_HEAD(&queue->resp_send_list); - queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL); + queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = queue->idx; goto out_free_queue; @@ -1646,7 +1646,7 @@ out_destroy_sq: out_free_connect: nvmet_tcp_free_cmd(&queue->connect); out_ida_remove: - ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + ida_free(&nvmet_tcp_queue_ida, queue->idx); out_free_queue: kfree(queue); return ret; -- GitLab From 4686af885a9168f9ec70c4063616640911c48b03 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 9 Feb 2022 10:54:49 +0200 Subject: [PATCH 0800/1586] nvme-rdma: add helpers for mapping/unmapping request Introduce nvme_rdma_dma_map_req/nvme_rdma_dma_unmap_req helper functions to improve code readability and ease on the error flow. Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 111 +++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 850f84d204d05..14ec2c85db060 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1282,6 +1282,22 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, return ib_post_send(queue->qp, &wr, NULL); } +static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq) +{ + struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + + if (blk_integrity_rq(rq)) { + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); + } + + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); +} + static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct request *rq) { @@ -1293,13 +1309,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return; - if (blk_integrity_rq(rq)) { - ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, - req->metadata_sgl->nents, rq_dma_dir(rq)); - sg_free_table_chained(&req->metadata_sgl->sg_table, - NVME_INLINE_METADATA_SG_CNT); - } - if (req->use_sig_mr) pool = &queue->qp->sig_mrs; @@ -1308,9 +1317,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, req->mr = NULL; } - ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, - rq_dma_dir(rq)); - sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); + nvme_rdma_dma_unmap_req(ibdev, rq); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1521,22 +1528,11 @@ mr_put: return -EINVAL; } -static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, - struct request *rq, struct nvme_command *c) +static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, + int *count, int *pi_count) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; - int pi_count = 0; - int count, ret; - - req->num_sge = 1; - refcount_set(&req->ref, 2); /* send and recv completions */ - - c->common.flags |= NVME_CMD_SGL_METABUF; - - if (!blk_rq_nr_phys_segments(rq)) - return nvme_rdma_set_sg_null(c); + int ret; req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); ret = sg_alloc_table_chained(&req->data_sgl.sg_table, @@ -1548,9 +1544,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, req->data_sgl.sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, - req->data_sgl.nents, rq_dma_dir(rq)); - if (unlikely(count <= 0)) { + *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, + req->data_sgl.nents, rq_dma_dir(rq)); + if (unlikely(*count <= 0)) { ret = -EIO; goto out_free_table; } @@ -1569,16 +1565,50 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, rq->bio, req->metadata_sgl->sg_table.sgl); - pi_count = ib_dma_map_sg(ibdev, - req->metadata_sgl->sg_table.sgl, - req->metadata_sgl->nents, - rq_dma_dir(rq)); - if (unlikely(pi_count <= 0)) { + *pi_count = ib_dma_map_sg(ibdev, + req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, + rq_dma_dir(rq)); + if (unlikely(*pi_count <= 0)) { ret = -EIO; goto out_free_pi_table; } } + return 0; + +out_free_pi_table: + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); +out_unmap_sg: + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); +out_free_table: + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); + return ret; +} + +static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, + struct request *rq, struct nvme_command *c) +{ + struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_rdma_device *dev = queue->device; + struct ib_device *ibdev = dev->dev; + int pi_count = 0; + int count, ret; + + req->num_sge = 1; + refcount_set(&req->ref, 2); /* send and recv completions */ + + c->common.flags |= NVME_CMD_SGL_METABUF; + + if (!blk_rq_nr_phys_segments(rq)) + return nvme_rdma_set_sg_null(c); + + ret = nvme_rdma_dma_map_req(ibdev, rq, &count, &pi_count); + if (unlikely(ret)) + return ret; + if (req->use_sig_mr) { ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); goto out; @@ -1602,23 +1632,12 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ret = nvme_rdma_map_sg_fr(queue, req, c, count); out: if (unlikely(ret)) - goto out_unmap_pi_sg; + goto out_dma_unmap_req; return 0; -out_unmap_pi_sg: - if (blk_integrity_rq(rq)) - ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, - req->metadata_sgl->nents, rq_dma_dir(rq)); -out_free_pi_table: - if (blk_integrity_rq(rq)) - sg_free_table_chained(&req->metadata_sgl->sg_table, - NVME_INLINE_METADATA_SG_CNT); -out_unmap_sg: - ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, - rq_dma_dir(rq)); -out_free_table: - sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); +out_dma_unmap_req: + nvme_rdma_dma_unmap_req(ibdev, rq); return ret; } -- GitLab From 72e8b5cd7dd387ba3eee81b0a59746ad8ccdcb5f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 10 Feb 2022 11:12:36 -0800 Subject: [PATCH 0801/1586] nvme: add a helper to initialize connect_q Add and use helper to remove duplicate code for fabrics connect_q initialization and error handling for all the transports. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 6 ++---- drivers/nvme/host/nvme.h | 8 ++++++++ drivers/nvme/host/rdma.c | 6 ++---- drivers/nvme/host/tcp.c | 6 ++---- drivers/nvme/target/loop.c | 6 ++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 96a5d7fc36f7e..080f85f4105f3 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2916,11 +2916,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.tagset = &ctrl->tag_set; - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); + ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); + if (ret) goto out_free_tag_set; - } ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a162f6c6da6e1..f8658f984d649 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -894,6 +894,14 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif +static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl) +{ + ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ctrl->connect_q)) + return PTR_ERR(ctrl->connect_q); + return 0; +} + static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 14ec2c85db060..c49b9c3c46f23 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -978,11 +978,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_io_queues; } - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); + ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); + if (ret) goto out_free_tag_set; - } } ret = nvme_rdma_start_io_queues(ctrl); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ceb28675fdf6..6cbcc8b4daaf9 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1825,11 +1825,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_free_io_queues; } - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) { - ret = PTR_ERR(ctrl->connect_q); + ret = nvme_ctrl_init_connect_q(ctrl); + if (ret) goto out_free_tag_set; - } } ret = nvme_tcp_start_io_queues(ctrl); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index eb1094254c823..23f9d6f888042 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -543,11 +543,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) goto out_destroy_queues; - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); + ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); + if (ret) goto out_free_tagset; - } ret = nvme_loop_connect_io_queues(ctrl); if (ret) -- GitLab From bd83fe6f2cd2133beaac7c423fd36c3515048fc8 Mon Sep 17 00:00:00 2001 From: Alan Adamson Date: Thu, 3 Feb 2022 00:11:53 -0800 Subject: [PATCH 0802/1586] nvme: add verbose error logging Improves logging of NVMe errors. If NVME_VERBOSE_ERRORS is configured, a verbose description of the error is logged, otherwise only status codes/bits is logged. Signed-off-by: Chaitanya Kulkarni [kch]: fix several nits, cosmetics, and trim down code. Signed-off-by: Martin K. Petersen Signed-off-by: Alan Adamson Reviewed-by: Himanshu Madhani Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Kconfig | 8 ++ drivers/nvme/host/Makefile | 2 +- drivers/nvme/host/constants.c | 185 ++++++++++++++++++++++++++++++++++ drivers/nvme/host/core.c | 33 ++++++ drivers/nvme/host/nvme.h | 19 ++++ include/linux/nvme.h | 1 + 6 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 drivers/nvme/host/constants.c diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index dc0450ca23a30..d6d056963c062 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -24,6 +24,14 @@ config NVME_MULTIPATH /dev/nvmeXnY device will show up for each NVMe namespace, even if it is accessible through multiple controllers. +config NVME_VERBOSE_ERRORS + bool "NVMe verbose error reporting" + depends on NVME_CORE + help + This option enables verbose reporting for NVMe errors. The + error translation table will grow the kernel image size by + about 4 KB. + config NVME_HWMON bool "NVMe hardware monitoring" depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON) diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index dfaacd472e5dd..476c5c9884961 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o obj-$(CONFIG_NVME_TCP) += nvme-tcp.o -nvme-core-y := core.o ioctl.o +nvme-core-y := core.o ioctl.o constants.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c new file mode 100644 index 0000000000000..7d49eb34b348e --- /dev/null +++ b/drivers/nvme/host/constants.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVM Express device driver verbose errors + * Copyright (c) 2022, Oracle and/or its affiliates + */ + +#include +#include "nvme.h" + +#ifdef CONFIG_NVME_VERBOSE_ERRORS +static const char * const nvme_ops[] = { + [nvme_cmd_flush] = "Flush", + [nvme_cmd_write] = "Write", + [nvme_cmd_read] = "Read", + [nvme_cmd_write_uncor] = "Write Uncorrectable", + [nvme_cmd_compare] = "Compare", + [nvme_cmd_write_zeroes] = "Write Zeros", + [nvme_cmd_dsm] = "Dataset Management", + [nvme_cmd_verify] = "Verify", + [nvme_cmd_resv_register] = "Reservation Register", + [nvme_cmd_resv_report] = "Reservation Report", + [nvme_cmd_resv_acquire] = "Reservation Acquire", + [nvme_cmd_resv_release] = "Reservation Release", + [nvme_cmd_zone_mgmt_send] = "Zone Management Send", + [nvme_cmd_zone_mgmt_recv] = "Zone Management Receive", + [nvme_cmd_zone_append] = "Zone Management Append", +}; + +static const char * const nvme_admin_ops[] = { + [nvme_admin_delete_sq] = "Delete SQ", + [nvme_admin_create_sq] = "Create SQ", + [nvme_admin_get_log_page] = "Get Log Page", + [nvme_admin_delete_cq] = "Delete CQ", + [nvme_admin_create_cq] = "Create CQ", + [nvme_admin_identify] = "Identify", + [nvme_admin_abort_cmd] = "Abort Command", + [nvme_admin_set_features] = "Set Features", + [nvme_admin_get_features] = "Get Features", + [nvme_admin_async_event] = "Async Event", + [nvme_admin_ns_mgmt] = "Namespace Management", + [nvme_admin_activate_fw] = "Activate Firmware", + [nvme_admin_download_fw] = "Download Firmware", + [nvme_admin_dev_self_test] = "Device Self Test", + [nvme_admin_ns_attach] = "Namespace Attach", + [nvme_admin_keep_alive] = "Keep Alive", + [nvme_admin_directive_send] = "Directive Send", + [nvme_admin_directive_recv] = "Directive Receive", + [nvme_admin_virtual_mgmt] = "Virtual Management", + [nvme_admin_nvme_mi_send] = "NVMe Send MI", + [nvme_admin_nvme_mi_recv] = "NVMe Receive MI", + [nvme_admin_dbbuf] = "Doorbell Buffer Config", + [nvme_admin_format_nvm] = "Format NVM", + [nvme_admin_security_send] = "Security Send", + [nvme_admin_security_recv] = "Security Receive", + [nvme_admin_sanitize_nvm] = "Sanitize NVM", + [nvme_admin_get_lba_status] = "Get LBA Status", +}; + +static const char * const nvme_statuses[] = { + [NVME_SC_SUCCESS] = "Success", + [NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode", + [NVME_SC_INVALID_FIELD] = "Invalid Field in Command", + [NVME_SC_CMDID_CONFLICT] = "Command ID Conflict", + [NVME_SC_DATA_XFER_ERROR] = "Data Transfer Error", + [NVME_SC_POWER_LOSS] = "Commands Aborted due to Power Loss Notification", + [NVME_SC_INTERNAL] = "Internal Error", + [NVME_SC_ABORT_REQ] = "Command Abort Requested", + [NVME_SC_ABORT_QUEUE] = "Command Aborted due to SQ Deletion", + [NVME_SC_FUSED_FAIL] = "Command Aborted due to Failed Fused Command", + [NVME_SC_FUSED_MISSING] = "Command Aborted due to Missing Fused Command", + [NVME_SC_INVALID_NS] = "Invalid Namespace or Format", + [NVME_SC_CMD_SEQ_ERROR] = "Command Sequence Error", + [NVME_SC_SGL_INVALID_LAST] = "Invalid SGL Segment Descriptor", + [NVME_SC_SGL_INVALID_COUNT] = "Invalid Number of SGL Descriptors", + [NVME_SC_SGL_INVALID_DATA] = "Data SGL Length Invalid", + [NVME_SC_SGL_INVALID_METADATA] = "Metadata SGL Length Invalid", + [NVME_SC_SGL_INVALID_TYPE] = "SGL Descriptor Type Invalid", + [NVME_SC_CMB_INVALID_USE] = "Invalid Use of Controller Memory Buffer", + [NVME_SC_PRP_INVALID_OFFSET] = "PRP Offset Invalid", + [NVME_SC_ATOMIC_WU_EXCEEDED] = "Atomic Write Unit Exceeded", + [NVME_SC_OP_DENIED] = "Operation Denied", + [NVME_SC_SGL_INVALID_OFFSET] = "SGL Offset Invalid", + [NVME_SC_RESERVED] = "Reserved", + [NVME_SC_HOST_ID_INCONSIST] = "Host Identifier Inconsistent Format", + [NVME_SC_KA_TIMEOUT_EXPIRED] = "Keep Alive Timeout Expired", + [NVME_SC_KA_TIMEOUT_INVALID] = "Keep Alive Timeout Invalid", + [NVME_SC_ABORTED_PREEMPT_ABORT] = "Command Aborted due to Preempt and Abort", + [NVME_SC_SANITIZE_FAILED] = "Sanitize Failed", + [NVME_SC_SANITIZE_IN_PROGRESS] = "Sanitize In Progress", + [NVME_SC_SGL_INVALID_GRANULARITY] = "SGL Data Block Granularity Invalid", + [NVME_SC_CMD_NOT_SUP_CMB_QUEUE] = "Command Not Supported for Queue in CMB", + [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", + [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", + [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", + [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", + [NVME_SC_LBA_RANGE] = "LBA Out of Range", + [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded", + [NVME_SC_NS_NOT_READY] = "Namespace Not Ready", + [NVME_SC_RESERVATION_CONFLICT] = "Reservation Conflict", + [NVME_SC_FORMAT_IN_PROGRESS] = "Format In Progress", + [NVME_SC_CQ_INVALID] = "Completion Queue Invalid", + [NVME_SC_QID_INVALID] = "Invalid Queue Identifier", + [NVME_SC_QUEUE_SIZE] = "Invalid Queue Size", + [NVME_SC_ABORT_LIMIT] = "Abort Command Limit Exceeded", + [NVME_SC_ABORT_MISSING] = "Reserved", /* XXX */ + [NVME_SC_ASYNC_LIMIT] = "Asynchronous Event Request Limit Exceeded", + [NVME_SC_FIRMWARE_SLOT] = "Invalid Firmware Slot", + [NVME_SC_FIRMWARE_IMAGE] = "Invalid Firmware Image", + [NVME_SC_INVALID_VECTOR] = "Invalid Interrupt Vector", + [NVME_SC_INVALID_LOG_PAGE] = "Invalid Log Page", + [NVME_SC_INVALID_FORMAT] = "Invalid Format", + [NVME_SC_FW_NEEDS_CONV_RESET] = "Firmware Activation Requires Conventional Reset", + [NVME_SC_INVALID_QUEUE] = "Invalid Queue Deletion", + [NVME_SC_FEATURE_NOT_SAVEABLE] = "Feature Identifier Not Saveable", + [NVME_SC_FEATURE_NOT_CHANGEABLE] = "Feature Not Changeable", + [NVME_SC_FEATURE_NOT_PER_NS] = "Feature Not Namespace Specific", + [NVME_SC_FW_NEEDS_SUBSYS_RESET] = "Firmware Activation Requires NVM Subsystem Reset", + [NVME_SC_FW_NEEDS_RESET] = "Firmware Activation Requires Reset", + [NVME_SC_FW_NEEDS_MAX_TIME] = "Firmware Activation Requires Maximum Time Violation", + [NVME_SC_FW_ACTIVATE_PROHIBITED] = "Firmware Activation Prohibited", + [NVME_SC_OVERLAPPING_RANGE] = "Overlapping Range", + [NVME_SC_NS_INSUFFICIENT_CAP] = "Namespace Insufficient Capacity", + [NVME_SC_NS_ID_UNAVAILABLE] = "Namespace Identifier Unavailable", + [NVME_SC_NS_ALREADY_ATTACHED] = "Namespace Already Attached", + [NVME_SC_NS_IS_PRIVATE] = "Namespace Is Private", + [NVME_SC_NS_NOT_ATTACHED] = "Namespace Not Attached", + [NVME_SC_THIN_PROV_NOT_SUPP] = "Thin Provisioning Not Supported", + [NVME_SC_CTRL_LIST_INVALID] = "Controller List Invalid", + [NVME_SC_SELT_TEST_IN_PROGRESS] = "Device Self-test In Progress", + [NVME_SC_BP_WRITE_PROHIBITED] = "Boot Partition Write Prohibited", + [NVME_SC_CTRL_ID_INVALID] = "Invalid Controller Identifier", + [NVME_SC_SEC_CTRL_STATE_INVALID] = "Invalid Secondary Controller State", + [NVME_SC_CTRL_RES_NUM_INVALID] = "Invalid Number of Controller Resources", + [NVME_SC_RES_ID_INVALID] = "Invalid Resource Identifier", + [NVME_SC_PMR_SAN_PROHIBITED] = "Sanitize Prohibited", + [NVME_SC_ANA_GROUP_ID_INVALID] = "ANA Group Identifier Invalid", + [NVME_SC_ANA_ATTACH_FAILED] = "ANA Attach Failed", + [NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes", + [NVME_SC_INVALID_PI] = "Invalid Protection Information", + [NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range", + [NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported", + [NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error", + [NVME_SC_ZONE_FULL] = "Zone Is Full", + [NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only", + [NVME_SC_ZONE_OFFLINE] = "Zone Is Offline", + [NVME_SC_ZONE_INVALID_WRITE] = "Zone Invalid Write", + [NVME_SC_ZONE_TOO_MANY_ACTIVE] = "Too Many Active Zones", + [NVME_SC_ZONE_TOO_MANY_OPEN] = "Too Many Open Zones", + [NVME_SC_ZONE_INVALID_TRANSITION] = "Invalid Zone State Transition", + [NVME_SC_WRITE_FAULT] = "Write Fault", + [NVME_SC_READ_ERROR] = "Unrecovered Read Error", + [NVME_SC_GUARD_CHECK] = "End-to-end Guard Check Error", + [NVME_SC_APPTAG_CHECK] = "End-to-end Application Tag Check Error", + [NVME_SC_REFTAG_CHECK] = "End-to-end Reference Tag Check Error", + [NVME_SC_COMPARE_FAILED] = "Compare Failure", + [NVME_SC_ACCESS_DENIED] = "Access Denied", + [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block", + [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss", + [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible", + [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition", + [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error", +}; + +const unsigned char *nvme_get_error_status_str(u16 status) +{ + status &= 0x7ff; + if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) + return nvme_statuses[status & 0x7ff]; + return "Unknown"; +} + +const unsigned char *nvme_get_opcode_str(u8 opcode) +{ + if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode]) + return nvme_ops[opcode]; + return "Unknown"; +} + +const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +{ + if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode]) + return nvme_admin_ops[opcode]; + return "Unknown"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da0e19148177c..5e55732f3ea1e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -299,6 +299,37 @@ static void nvme_retry_req(struct request *req) blk_mq_delay_kick_requeue_list(req->q, delay); } +static void nvme_log_error(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + struct nvme_request *nr = nvme_req(req); + + if (ns) { + pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", + ns->disk ? ns->disk->disk_name : "?", + nvme_get_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)), + (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : ""); + return; + } + + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s\n", + dev_name(nr->ctrl->device), + nvme_get_admin_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : ""); +} + enum nvme_disposition { COMPLETE, RETRY, @@ -339,6 +370,8 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); + if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS)) + nvme_log_error(req); nvme_end_req_zoned(req); nvme_trace_bio_complete(req); blk_mq_end_request(req, status); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f8658f984d649..08893c04ca898 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -938,4 +938,23 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; } +#ifdef CONFIG_NVME_VERBOSE_ERRORS +const unsigned char *nvme_get_error_status_str(u16 status); +const unsigned char *nvme_get_opcode_str(u8 opcode); +const unsigned char *nvme_get_admin_opcode_str(u8 opcode); +#else /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const unsigned char *nvme_get_error_status_str(u16 status) +{ + return "I/O Error"; +} +static inline const unsigned char *nvme_get_opcode_str(u8 opcode) +{ + return "I/O Cmd"; +} +static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +{ + return "Admin Cmd"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ + #endif /* _NVME_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 855dd9b3e84be..1f946e5bf7c12 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1636,6 +1636,7 @@ enum { NVME_SC_HOST_ABORTED_CMD = 0x371, NVME_SC_CRD = 0x1800, + NVME_SC_MORE = 0x2000, NVME_SC_DNR = 0x4000, }; -- GitLab From 89377bc1975c2993bde4a498a3a4e5817ac0ae2c Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Thu, 10 Feb 2022 11:07:55 +0530 Subject: [PATCH 0803/1586] nvme: add vectored-io support for user-passthrough Add a new NVME_IOCTL_IO64_CMD_VEC ioctl that works like the existing NVME_IOCTL_IO64_CMD ioctl except that it takes and array of iovecs and thus supports vectored I/O. - cmd.addr is base address of user iovec array - cmd.vec_cnt is count of iovec array elements This patch does not include vectored-variant for admin-commands as most of them are light on buffers and likely to have low invocation frequency. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/ioctl.c | 35 ++++++++++++++++++++++++--------- include/uapi/linux/nvme_ioctl.h | 6 +++++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 22314962842da..aaf3dfad26574 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -56,7 +56,7 @@ out: static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout) + u32 meta_seed, u64 *result, unsigned timeout, bool vec) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -75,8 +75,22 @@ static int nvme_submit_user_cmd(struct request_queue *q, nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, + if (!vec) + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, GFP_KERNEL); + else { + struct iovec fast_iov[UIO_FASTIOV]; + struct iovec *iov = fast_iov; + struct iov_iter iter; + + ret = import_iovec(rq_data_dir(req), ubuffer, bufflen, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + goto out; + ret = blk_rq_map_user_iov(q, req, NULL, &iter, + GFP_KERNEL); + kfree(iov); + } if (ret) goto out; bio = req->bio; @@ -170,7 +184,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return nvme_submit_user_cmd(ns->queue, &c, nvme_to_user_ptr(io.addr), length, - metadata, meta_len, lower_32_bits(io.slba), NULL, 0); + metadata, meta_len, lower_32_bits(io.slba), NULL, 0, + false); } static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, @@ -224,7 +239,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, nvme_to_user_ptr(cmd.addr), cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &result, timeout); + 0, &result, timeout, false); if (status >= 0) { if (put_user(result, &ucmd->result)) @@ -235,7 +250,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd) + struct nvme_passthru_cmd64 __user *ucmd, bool vec) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; @@ -270,7 +285,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, nvme_to_user_ptr(cmd.addr), cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &cmd.result, timeout); + 0, &cmd.result, timeout, vec); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) @@ -296,7 +311,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp); + return nvme_user_cmd64(ctrl, NULL, argp, false); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -340,7 +355,9 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp); + return nvme_user_cmd64(ns->ctrl, ns, argp, false); + case NVME_IOCTL_IO64_CMD_VEC: + return nvme_user_cmd64(ns->ctrl, ns, argp, true); default: return -ENOTTY; } @@ -480,7 +497,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp); + return nvme_user_cmd64(ctrl, NULL, argp, false); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index d99b5a7726980..b2e43185e3b55 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 { __u64 metadata; __u64 addr; __u32 metadata_len; - __u32 data_len; + union { + __u32 data_len; /* for non-vectored io */ + __u32 vec_cnt; /* for vectored io */ + }; __u32 cdw10; __u32 cdw11; __u32 cdw12; @@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 { #define NVME_IOCTL_RESCAN _IO('N', 0x46) #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- GitLab From 20d64911e7580f7e29c0086d67860c18307377d7 Mon Sep 17 00:00:00 2001 From: Martin Belanger Date: Tue, 8 Feb 2022 14:33:45 -0500 Subject: [PATCH 0804/1586] nvme: send uevent on connection up When connectivity with a controller is lost, the driver will keep trying to reconnect once every 10 sec. When connection is restored, user-space apps need to be informed so that they can take proper action. For example, TP8010 introduces the DIM PDU, which is used to register with a discovery controller (DC). The DIM PDU is sent from user-space. The DIM PDU must be sent every time a connection is established with a DC. Therefore, the kernel must tell user-space apps when connection is restored so that registration can happen. The uevent sent is a "change" uevent with environmental data set to: "NVME_EVENT=connected". Signed-off-by: Martin Belanger Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: John Meneghini Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e55732f3ea1e..b3e70bb2e6dc5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4262,6 +4262,13 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env) return ret; } +static void nvme_change_uevent(struct nvme_ctrl *ctrl, char *envdata) +{ + char *envp[2] = { envdata, NULL }; + + kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp); +} + static void nvme_aen_uevent(struct nvme_ctrl *ctrl) { char *envp[2] = { NULL, NULL }; @@ -4429,6 +4436,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_queue_scan(ctrl); nvme_start_queues(ctrl); } + + nvme_change_uevent(ctrl, "NVME_EVENT=connected"); } EXPORT_SYMBOL_GPL(nvme_start_ctrl); -- GitLab From 86c2457a8e8112f16af8fd10a3e1dd7a302c3c3e Mon Sep 17 00:00:00 2001 From: Martin Belanger Date: Tue, 8 Feb 2022 14:33:46 -0500 Subject: [PATCH 0805/1586] nvme: expose cntrltype and dctype through sysfs TP8010 introduces the Discovery Controller Type attribute (dctype). The dctype is returned in the response to the Identify command. This patch exposes the dctype through the sysfs. Since the dctype depends on the Controller Type (cntrltype), another attribute of the Identify response, the patch also exposes the cntrltype as well. The dctype will only be displayed for discovery controllers. A note about the naming of this attribute: Although TP8010 calls this attribute the Discovery Controller Type, note that the dctype is now part of the response to the Identify command for all controller types. I/O, Discovery, and Admin controllers all share the same Identify response PDU structure. Non-discovery controllers as well as pre-TP8010 discovery controllers will continue to set this field to 0 (which has always been the default for reserved bytes). Per TP8010, the value 0 now means "Discovery controller type is not reported" instead of "Reserved". One could argue that this definition is correct even for non-discovery controllers, and by extension, exposing it in the sysfs for non-discovery controllers is appropriate. Signed-off-by: Martin Belanger Reviewed-by: Chaitanya Kulkarni Reviewed-by: John Meneghini Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 3 +++ include/linux/nvme.h | 10 +++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b3e70bb2e6dc5..52a49ae5b6730 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3023,6 +3023,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_namespaces = le32_to_cpu(id->mnan); ctrl->ctratt = le32_to_cpu(id->ctratt); + ctrl->cntrltype = id->cntrltype; + ctrl->dctype = id->dctype; + if (id->rtd3e) { /* us -> s */ u32 transition_time = le32_to_cpu(id->rtd3e) / USEC_PER_SEC; @@ -3556,6 +3559,40 @@ static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev, static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store); +static ssize_t cntrltype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const char * const type[] = { + [NVME_CTRL_IO] = "io\n", + [NVME_CTRL_DISC] = "discovery\n", + [NVME_CTRL_ADMIN] = "admin\n", + }; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype]) + return sysfs_emit(buf, "reserved\n"); + + return sysfs_emit(buf, type[ctrl->cntrltype]); +} +static DEVICE_ATTR_RO(cntrltype); + +static ssize_t dctype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const char * const type[] = { + [NVME_DCTYPE_NOT_REPORTED] = "none\n", + [NVME_DCTYPE_DDC] = "ddc\n", + [NVME_DCTYPE_CDC] = "cdc\n", + }; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype]) + return sysfs_emit(buf, "reserved\n"); + + return sysfs_emit(buf, type[ctrl->dctype]); +} +static DEVICE_ATTR_RO(dctype); + static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_rescan_controller.attr, @@ -3577,6 +3614,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_reconnect_delay.attr, &dev_attr_fast_io_fail_tmo.attr, &dev_attr_kato.attr, + &dev_attr_cntrltype.attr, + &dev_attr_dctype.attr, NULL }; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 08893c04ca898..4ce48203e63f8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -349,6 +349,9 @@ struct nvme_ctrl { unsigned long discard_page_busy; struct nvme_fault_inject fault_inject; + + enum nvme_ctrl_type cntrltype; + enum nvme_dctype dctype; }; enum nvme_iopolicy { diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1f946e5bf7c12..9dbc3ef4daf7c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -43,6 +43,12 @@ enum nvme_ctrl_type { NVME_CTRL_ADMIN = 3, /* Administrative controller */ }; +enum nvme_dctype { + NVME_DCTYPE_NOT_REPORTED = 0, + NVME_DCTYPE_DDC = 1, /* Direct Discovery Controller */ + NVME_DCTYPE_CDC = 2, /* Central Discovery Controller */ +}; + /* Address Family codes for Discovery Log Page entry ADRFAM field */ enum { NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ @@ -320,7 +326,9 @@ struct nvme_id_ctrl { __le16 icdoff; __u8 ctrattr; __u8 msdbd; - __u8 rsvd1804[244]; + __u8 rsvd1804[2]; + __u8 dctype; + __u8 rsvd1807[241]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; -- GitLab From 1c3adf0de1db86cf354dcb1a2dd1184e5e63a50a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Feb 2022 07:03:08 -0800 Subject: [PATCH 0806/1586] nvme: explicitly set non-error for directives Stream directives is an optional feature. It is not an error if a controller doesn't support as many as the kernel can optionally use. Explicitly set the non-error return value on this condition with a comment explaining why. Note, the return value was already 0 in this condition, so the setting is redundant. This patch should just silence bots that falsely believe the condition contains an error omission. Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 52a49ae5b6730..1ce579d583c06 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -809,6 +809,8 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", ctrl->nssa); + /* this condition is not an error: streams are optional */ + ret = 0; goto out_disable_stream; } -- GitLab From 0a9f850061d9126b9a4aaf56ae1810138cac0f51 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Feb 2022 07:37:19 -0800 Subject: [PATCH 0807/1586] nvme: remove nssa from struct nvme_ctrl The reported number of streams is not used outside the function that gets it, so no need to stash it in the controller structure. Use a local variable instead. Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 +++++---- drivers/nvme/host/nvme.h | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1ce579d583c06..9cffc4770e737 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -790,6 +790,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, static int nvme_configure_directives(struct nvme_ctrl *ctrl) { struct streams_directive_params s; + u16 nssa; int ret; if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES)) @@ -805,16 +806,16 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) if (ret) goto out_disable_stream; - ctrl->nssa = le16_to_cpu(s.nssa); - if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { + nssa = le16_to_cpu(s.nssa); + if (nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", - ctrl->nssa); + nssa); /* this condition is not an error: streams are optional */ ret = 0; goto out_disable_stream; } - ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); + ctrl->nr_streams = min_t(u16, nssa, BLK_MAX_WRITE_HINTS - 1); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4ce48203e63f8..587d92df118b7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -280,7 +280,6 @@ struct nvme_ctrl { u16 crdt[3]; u16 oncs; u16 oacs; - u16 nssa; u16 nr_streams; u16 sqsize; u32 max_namespaces; -- GitLab From fd8099e7918cd2df39ef306dd1d1af7178a15b81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 10:57:15 +0100 Subject: [PATCH 0808/1586] nvme: cleanup __nvme_check_ids Pass the actual nvme_ns_ids used for the comparison instead of the ns_head that isn't needed and use a more descriptive function name. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9cffc4770e737..076a03b801b7e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3673,16 +3673,15 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, return NULL; } -static int __nvme_check_ids(struct nvme_subsystem *subsys, - struct nvme_ns_head *new) +static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys, + struct nvme_ns_ids *ids) { struct nvme_ns_head *h; lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (nvme_ns_ids_valid(&new->ids) && - nvme_ns_ids_equal(&new->ids, &h->ids)) + if (nvme_ns_ids_valid(ids) && nvme_ns_ids_equal(ids, &h->ids)) return -EINVAL; } @@ -3781,7 +3780,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ids = *ids; kref_init(&head->ref); - ret = __nvme_check_ids(ctrl->subsys, head); + ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids); if (ret) { dev_err(ctrl->device, "duplicate IDs for nsid %d\n", nsid); -- GitLab From e2724cb9f0c406b8fb66efd3aa9e8b3edfd8d5c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 11:32:58 +0100 Subject: [PATCH 0809/1586] nvme: fix the check for duplicate unique identifiers nvme_subsys_check_duplicate_ids should needs to return an error if any of the identifiers matches, not just if all of them match. But it does not need to and should not look at the CSI value for this sanity check. Rewrite the logic to be separate from nvme_ns_ids_equal and optimize it by reducing duplicate checks for non-present identifiers. Fixes: ed754e5deeb1 ("nvme: track shared namespaces") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 076a03b801b7e..3e6ac985b24f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1716,13 +1716,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) -{ - return !uuid_is_null(&ids->uuid) || - memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) || - memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); -} - static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) { return uuid_equal(&a->uuid, &b->uuid) && @@ -3676,12 +3669,21 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys, struct nvme_ns_ids *ids) { + bool has_uuid = !uuid_is_null(&ids->uuid); + bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid)); + bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); struct nvme_ns_head *h; lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (nvme_ns_ids_valid(ids) && nvme_ns_ids_equal(ids, &h->ids)) + if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid)) + return -EINVAL; + if (has_nguid && + memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0) + return -EINVAL; + if (has_eui64 && + memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0) return -EINVAL; } -- GitLab From e2d77d2e11c4f1e70a1a24cc8fe63ff3dc9b53ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 17:46:50 +0100 Subject: [PATCH 0810/1586] nvme: check for duplicate identifiers earlier Lift the check for duplicate identifiers into nvme_init_ns_head, which avoids pointless error unwinding in case they don't match, and also matches where we check identifier validity for the multipath case. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3e6ac985b24f6..cece987ba1691 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3782,13 +3782,6 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ids = *ids; kref_init(&head->ref); - ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids); - if (ret) { - dev_err(ctrl->device, - "duplicate IDs for nsid %d\n", nsid); - goto out_cleanup_srcu; - } - if (head->ids.csi) { ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects); if (ret) @@ -3827,6 +3820,12 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { + ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); + if (ret) { + dev_err(ctrl->device, + "duplicate IDs for nsid %d\n", nsid); + goto out_unlock; + } head = nvme_alloc_ns_head(ctrl, nsid, ids); if (IS_ERR(head)) { ret = PTR_ERR(head); -- GitLab From 2079f41ec6ffaad9aa51ca550105b2228467aec7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 17:48:32 +0100 Subject: [PATCH 0811/1586] nvme: check that EUI/GUID/UUID are globally unique Add a check to verify that the unique identifiers are unique globally in addition to the existing check that verifies that they are unique inside a single subsystem. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cece987ba1691..f8084ded69e50 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3810,12 +3810,45 @@ out: return ERR_PTR(ret); } +static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this, + struct nvme_ns_ids *ids) +{ + struct nvme_subsystem *s; + int ret = 0; + + /* + * Note that this check is racy as we try to avoid holding the global + * lock over the whole ns_head creation. But it is only intended as + * a sanity check anyway. + */ + mutex_lock(&nvme_subsystems_lock); + list_for_each_entry(s, &nvme_subsystems, entry) { + if (s == this) + continue; + mutex_lock(&s->lock); + ret = nvme_subsys_check_duplicate_ids(s, ids); + mutex_unlock(&s->lock); + if (ret) + break; + } + mutex_unlock(&nvme_subsystems_lock); + + return ret; +} + static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_ns_ids *ids, bool is_shared) { struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ns_head *head = NULL; - int ret = 0; + int ret; + + ret = nvme_global_check_duplicate_ids(ctrl->subsys, ids); + if (ret) { + dev_err(ctrl->device, + "globally duplicate IDs for nsid %d\n", nsid); + return ret; + } mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl->subsys, nsid); @@ -3823,7 +3856,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); if (ret) { dev_err(ctrl->device, - "duplicate IDs for nsid %d\n", nsid); + "duplicate IDs in subsystem for nsid %d\n", + nsid); goto out_unlock; } head = nvme_alloc_ns_head(ctrl, nsid, ids); -- GitLab From 78e27f970f73a4ee57dc050a6233e09a56963391 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:40 +0200 Subject: [PATCH 0812/1586] spi: pxa2xx-pci: Refactor CE4100 to use ->setup() Refactor CE4100 handling code to use ->setup() instead of spreading potentially confusing conditional. Besides that, it will allow to refactor further to avoid intermediate storage for the used configuration parameters. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 6d60972e4e207..bd20379d93424 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -30,7 +30,7 @@ enum { struct pxa_spi_info { enum pxa_ssp_type type; int port_id; - int num_chipselect; + unsigned int num_chipselect; unsigned long max_clk_rate; /* DMA channel request parameters */ @@ -114,6 +114,14 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return 0; } +static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) +{ + c->num_chipselect = dev->devfn; + c->max_clk_rate = 3686400; + + return 0; +} + static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { struct dw_dma_slave *tx, *rx; @@ -163,8 +171,7 @@ static struct pxa_spi_info spi_info_configs[] = { [PORT_CE4100] = { .type = PXA25x_SSP, .port_id = -1, - .num_chipselect = -1, - .max_clk_rate = 3686400, + .setup = ce4100_spi_setup, }, [PORT_BYT] = { .type = LPSS_BYT_SSP, @@ -248,7 +255,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, } memset(&spi_pdata, 0, sizeof(spi_pdata)); - spi_pdata.num_chipselect = (c->num_chipselect > 0) ? c->num_chipselect : dev->devfn; + spi_pdata.num_chipselect = c->num_chipselect; spi_pdata.dma_filter = c->dma_filter; spi_pdata.tx_param = c->tx_param; spi_pdata.rx_param = c->rx_param; -- GitLab From 71ea0e3ac70a50b0c56105e116ed903f8e504e8f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:41 +0200 Subject: [PATCH 0813/1586] spi: pxa2xx-pci: Refactor Quark X1000 to use ->setup() Refactor Quark X1000 handling code to use ->setup() instead of using the configuration data structure directly. It will allow to refactor further to avoid intermediate storage for the used configuration parameters. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index bd20379d93424..4d617ad72bcae 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -167,6 +167,14 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return 0; } +static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) +{ + c->num_chipselect = 1; + c->max_clk_rate = 50000000; + + return 0; +} + static struct pxa_spi_info spi_info_configs[] = { [PORT_CE4100] = { .type = PXA25x_SSP, @@ -209,8 +217,7 @@ static struct pxa_spi_info spi_info_configs[] = { [PORT_QUARK_X1000] = { .type = QUARK_X1000_SSP, .port_id = -1, - .num_chipselect = 1, - .max_clk_rate = 50000000, + .setup = qrk_spi_setup, }, [PORT_LPT0] = { .type = LPSS_LPT_SSP, -- GitLab From 1d9d62959f1b52eb939df38b9fda8beea455c751 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:42 +0200 Subject: [PATCH 0814/1586] spi: pxa2xx-pci: Drop redundant NULL check in ->probe() Since all platforms are using ->setup() function, drop unneeded check. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 4d617ad72bcae..90b95e49a164f 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -255,11 +255,9 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; c = &spi_info_configs[ent->driver_data]; - if (c->setup) { - ret = c->setup(dev, c); - if (ret) - return ret; - } + ret = c->setup(dev, c); + if (ret) + return ret; memset(&spi_pdata, 0, sizeof(spi_pdata)); spi_pdata.num_chipselect = c->num_chipselect; -- GitLab From 108607ce4e39a51caca51aa97c44c31041a597d1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:43 +0200 Subject: [PATCH 0815/1586] spi: pxa2xx-pci: Move port_id assignment to ->setup() Instead of using conditional, move port_id to the corresponding ->setup() functions. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 90b95e49a164f..87629da3e544d 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -29,7 +29,7 @@ enum { struct pxa_spi_info { enum pxa_ssp_type type; - int port_id; + unsigned int port_id; unsigned int num_chipselect; unsigned long max_clk_rate; @@ -116,6 +116,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { + c->port_id = dev->devfn; c->num_chipselect = dev->devfn; c->max_clk_rate = 3686400; @@ -169,6 +170,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { + c->port_id = dev->devfn; c->num_chipselect = 1; c->max_clk_rate = 50000000; @@ -178,7 +180,6 @@ static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static struct pxa_spi_info spi_info_configs[] = { [PORT_CE4100] = { .type = PXA25x_SSP, - .port_id = -1, .setup = ce4100_spi_setup, }, [PORT_BYT] = { @@ -216,7 +217,6 @@ static struct pxa_spi_info spi_info_configs[] = { }, [PORT_QUARK_X1000] = { .type = QUARK_X1000_SSP, - .port_id = -1, .setup = qrk_spi_setup, }, [PORT_LPT0] = { @@ -271,8 +271,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, ssp->dev = &dev->dev; ssp->phys_base = pci_resource_start(dev, 0); ssp->mmio_base = pcim_iomap_table(dev)[0]; - ssp->port_id = (c->port_id >= 0) ? c->port_id : dev->devfn; ssp->type = c->type; + ssp->port_id = c->port_id; pci_set_master(dev); -- GitLab From bd2e24de10da015147b02f8c2c4b8ebea8fa9574 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:44 +0200 Subject: [PATCH 0816/1586] spi: pxa2xx-pci: Move dma_burst_size assignment to ->setup() Instead of using conditional, move dma_burst_size to the corresponding ->setup() function. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-5-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 87629da3e544d..c2cbb002784ac 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -38,7 +38,7 @@ struct pxa_spi_info { void *tx_param; void *rx_param; - int dma_burst_size; + unsigned int dma_burst_size; int (*setup)(struct pci_dev *pdev, struct pxa_spi_info *c); }; @@ -111,6 +111,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) } c->dma_filter = lpss_dma_filter; + c->dma_burst_size = 1; return 0; } @@ -265,7 +266,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, spi_pdata.tx_param = c->tx_param; spi_pdata.rx_param = c->rx_param; spi_pdata.enable_dma = c->rx_param && c->tx_param; - spi_pdata.dma_burst_size = c->dma_burst_size ? c->dma_burst_size : 1; + spi_pdata.dma_burst_size = c->dma_burst_size; ssp = &spi_pdata.ssp; ssp->dev = &dev->dev; -- GitLab From 03f8e04e9f9be8d28c52ae801f37d49988f02ce4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:45 +0200 Subject: [PATCH 0817/1586] spi: pxa2xx-pci: Move max_clk_rate assignment to ->setup() Move max_clk_rate to the corresponding ->setup() function to unify with the rest. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-6-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index c2cbb002784ac..5ac1487c9b3f5 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -153,6 +153,8 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } + c->max_clk_rate = 25000000; + dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); if (ret) @@ -213,7 +215,6 @@ static struct pxa_spi_info spi_info_configs[] = { }, [PORT_MRFLD] = { .type = MRFLD_SSP, - .max_clk_rate = 25000000, .setup = mrfld_spi_setup, }, [PORT_QUARK_X1000] = { -- GitLab From 7e425c3c3d15241aa5b6c442a83f11b8bc4fee91 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:46 +0200 Subject: [PATCH 0818/1586] spi: pxa2xx-pci: Replace enum with direct use of PCI IDs Instead of creating an abstraction on top of PCI IDs, just use them directly. The corresponding enum can be dropped. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-7-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 161 ++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 78 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 5ac1487c9b3f5..a0f24e811e9fe 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -15,17 +15,17 @@ #include #include -enum { - PORT_QUARK_X1000, - PORT_BYT, - PORT_MRFLD, - PORT_BSW0, - PORT_BSW1, - PORT_BSW2, - PORT_CE4100, - PORT_LPT0, - PORT_LPT1, -}; +#define PCI_DEVICE_ID_INTEL_QUARK_X1000 0x0935 +#define PCI_DEVICE_ID_INTEL_BYT 0x0f0e +#define PCI_DEVICE_ID_INTEL_MRFLD 0x1194 +#define PCI_DEVICE_ID_INTEL_BSW0 0x228e +#define PCI_DEVICE_ID_INTEL_BSW1 0x2290 +#define PCI_DEVICE_ID_INTEL_BSW2 0x22ac +#define PCI_DEVICE_ID_INTEL_CE4100 0x2e6a +#define PCI_DEVICE_ID_INTEL_LPT0_0 0x9c65 +#define PCI_DEVICE_ID_INTEL_LPT0_1 0x9c66 +#define PCI_DEVICE_ID_INTEL_LPT1_0 0x9ce5 +#define PCI_DEVICE_ID_INTEL_LPT1_1 0x9ce6 struct pxa_spi_info { enum pxa_ssp_type type; @@ -86,6 +86,49 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) struct pci_dev *dma_dev; int ret; + switch (dev->device) { + case PCI_DEVICE_ID_INTEL_BYT: + c->type = LPSS_BYT_SSP; + c->port_id = 0; + c->tx_param = &byt_tx_param; + c->rx_param = &byt_rx_param; + break; + case PCI_DEVICE_ID_INTEL_BSW0: + c->type = LPSS_BSW_SSP; + c->port_id = 0; + c->tx_param = &bsw0_tx_param; + c->rx_param = &bsw0_rx_param; + break; + case PCI_DEVICE_ID_INTEL_BSW1: + c->type = LPSS_BSW_SSP; + c->port_id = 1; + c->tx_param = &bsw1_tx_param; + c->rx_param = &bsw1_rx_param; + break; + case PCI_DEVICE_ID_INTEL_BSW2: + c->type = LPSS_BSW_SSP; + c->port_id = 2; + c->tx_param = &bsw2_tx_param; + c->rx_param = &bsw2_rx_param; + break; + case PCI_DEVICE_ID_INTEL_LPT0_0: + case PCI_DEVICE_ID_INTEL_LPT1_0: + c->type = LPSS_LPT_SSP; + c->port_id = 0; + c->tx_param = &lpt0_tx_param; + c->rx_param = &lpt0_rx_param; + break; + case PCI_DEVICE_ID_INTEL_LPT0_1: + case PCI_DEVICE_ID_INTEL_LPT1_1: + c->type = LPSS_LPT_SSP; + c->port_id = 1; + c->tx_param = &lpt1_tx_param; + c->rx_param = &lpt1_rx_param; + break; + default: + return -ENODEV; + } + c->num_chipselect = 1; c->max_clk_rate = 50000000; @@ -115,8 +158,13 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return 0; } +static struct pxa_spi_info lpss_info_config = { + .setup = lpss_spi_setup, +}; + static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { + c->type = PXA25x_SSP; c->port_id = dev->devfn; c->num_chipselect = dev->devfn; c->max_clk_rate = 3686400; @@ -124,6 +172,10 @@ static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return 0; } +static struct pxa_spi_info ce4100_info_config = { + .setup = ce4100_spi_setup, +}; + static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { struct dw_dma_slave *tx, *rx; @@ -153,6 +205,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } + c->type = MRFLD_SSP; c->max_clk_rate = 25000000; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); @@ -171,8 +224,13 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return 0; } +static struct pxa_spi_info mrfld_info_config = { + .setup = mrfld_spi_setup, +}; + static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { + c->type = QUARK_X1000_SSP; c->port_id = dev->devfn; c->num_chipselect = 1; c->max_clk_rate = 50000000; @@ -180,61 +238,8 @@ static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return 0; } -static struct pxa_spi_info spi_info_configs[] = { - [PORT_CE4100] = { - .type = PXA25x_SSP, - .setup = ce4100_spi_setup, - }, - [PORT_BYT] = { - .type = LPSS_BYT_SSP, - .port_id = 0, - .setup = lpss_spi_setup, - .tx_param = &byt_tx_param, - .rx_param = &byt_rx_param, - }, - [PORT_BSW0] = { - .type = LPSS_BSW_SSP, - .port_id = 0, - .setup = lpss_spi_setup, - .tx_param = &bsw0_tx_param, - .rx_param = &bsw0_rx_param, - }, - [PORT_BSW1] = { - .type = LPSS_BSW_SSP, - .port_id = 1, - .setup = lpss_spi_setup, - .tx_param = &bsw1_tx_param, - .rx_param = &bsw1_rx_param, - }, - [PORT_BSW2] = { - .type = LPSS_BSW_SSP, - .port_id = 2, - .setup = lpss_spi_setup, - .tx_param = &bsw2_tx_param, - .rx_param = &bsw2_rx_param, - }, - [PORT_MRFLD] = { - .type = MRFLD_SSP, - .setup = mrfld_spi_setup, - }, - [PORT_QUARK_X1000] = { - .type = QUARK_X1000_SSP, - .setup = qrk_spi_setup, - }, - [PORT_LPT0] = { - .type = LPSS_LPT_SSP, - .port_id = 0, - .setup = lpss_spi_setup, - .tx_param = &lpt0_tx_param, - .rx_param = &lpt0_rx_param, - }, - [PORT_LPT1] = { - .type = LPSS_LPT_SSP, - .port_id = 1, - .setup = lpss_spi_setup, - .tx_param = &lpt1_tx_param, - .rx_param = &lpt1_rx_param, - }, +static struct pxa_spi_info qrk_info_config = { + .setup = qrk_spi_setup, }; static int pxa2xx_spi_pci_probe(struct pci_dev *dev, @@ -256,7 +261,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, if (ret) return ret; - c = &spi_info_configs[ent->driver_data]; + c = (struct pxa_spi_info *)ent->driver_data; ret = c->setup(dev, c); if (ret) return ret; @@ -320,17 +325,17 @@ static void pxa2xx_spi_pci_remove(struct pci_dev *dev) } static const struct pci_device_id pxa2xx_spi_pci_devices[] = { - { PCI_VDEVICE(INTEL, 0x0935), PORT_QUARK_X1000 }, - { PCI_VDEVICE(INTEL, 0x0f0e), PORT_BYT }, - { PCI_VDEVICE(INTEL, 0x1194), PORT_MRFLD }, - { PCI_VDEVICE(INTEL, 0x228e), PORT_BSW0 }, - { PCI_VDEVICE(INTEL, 0x2290), PORT_BSW1 }, - { PCI_VDEVICE(INTEL, 0x22ac), PORT_BSW2 }, - { PCI_VDEVICE(INTEL, 0x2e6a), PORT_CE4100 }, - { PCI_VDEVICE(INTEL, 0x9c65), PORT_LPT0 }, - { PCI_VDEVICE(INTEL, 0x9c66), PORT_LPT1 }, - { PCI_VDEVICE(INTEL, 0x9ce5), PORT_LPT0 }, - { PCI_VDEVICE(INTEL, 0x9ce6), PORT_LPT1 }, + { PCI_DEVICE_DATA(INTEL, QUARK_X1000, &qrk_info_config) }, + { PCI_DEVICE_DATA(INTEL, BYT, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, MRFLD, &mrfld_info_config) }, + { PCI_DEVICE_DATA(INTEL, BSW0, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, BSW1, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, BSW2, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, CE4100, &ce4100_info_config) }, + { PCI_DEVICE_DATA(INTEL, LPT0_0, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, LPT0_1, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, LPT1_0, &lpss_info_config) }, + { PCI_DEVICE_DATA(INTEL, LPT1_1, &lpss_info_config) }, { } }; MODULE_DEVICE_TABLE(pci, pxa2xx_spi_pci_devices); -- GitLab From cb50f3f32a044ea45192a43e756b26048d35ba95 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:47 +0200 Subject: [PATCH 0819/1586] spi: pxa2xx-pci: Drop unneeded checks in lpss_spi_setup() All of the LPSS devices are using DMA and set the parameters up, hence no need to test for that. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-8-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index a0f24e811e9fe..c041a9288d0c0 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -83,6 +83,7 @@ static void lpss_dma_put_device(void *dma_dev) static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { + struct dw_dma_slave *tx, *rx; struct pci_dev *dma_dev; int ret; @@ -137,21 +138,15 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) if (ret) return ret; - if (c->tx_param) { - struct dw_dma_slave *slave = c->tx_param; - - slave->dma_dev = &dma_dev->dev; - slave->m_master = 0; - slave->p_master = 1; - } - - if (c->rx_param) { - struct dw_dma_slave *slave = c->rx_param; + tx = c->tx_param; + tx->dma_dev = &dma_dev->dev; + tx->m_master = 0; + tx->p_master = 1; - slave->dma_dev = &dma_dev->dev; - slave->m_master = 0; - slave->p_master = 1; - } + rx = c->rx_param; + rx->dma_dev = &dma_dev->dev; + rx->m_master = 0; + rx->p_master = 1; c->dma_filter = lpss_dma_filter; c->dma_burst_size = 1; -- GitLab From c3f4fc096b37bc2e4535f16ac3d65d517bbc14eb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:48 +0200 Subject: [PATCH 0820/1586] spi: pxa2xx-pci: Extract pxa2xx_spi_pci_clk_register() Extract pxa2xx_spi_pci_clk_register() from ->probe() in order to reuse it later on for getting rid of max_clk_rate temporary storage. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-9-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index c041a9288d0c0..2dbe08034ad03 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -65,6 +65,24 @@ static struct dw_dma_slave lpt1_rx_param = { .src_id = 1 }; static struct dw_dma_slave lpt0_tx_param = { .dst_id = 2 }; static struct dw_dma_slave lpt0_rx_param = { .src_id = 3 }; +static void pxa2xx_spi_pci_clk_unregister(void *clk) +{ + clk_unregister(clk); +} + +static int pxa2xx_spi_pci_clk_register(struct pci_dev *dev, struct ssp_device *ssp, + unsigned long rate) +{ + char buf[40]; + + snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id); + ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0, rate); + if (IS_ERR(ssp->clk)) + return PTR_ERR(ssp->clk); + + return devm_add_action_or_reset(&dev->dev, pxa2xx_spi_pci_clk_unregister, ssp->clk); +} + static bool lpss_dma_filter(struct dma_chan *chan, void *param) { struct dw_dma_slave *dws = param; @@ -246,7 +264,6 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, struct pxa2xx_spi_controller spi_pdata; struct ssp_device *ssp; struct pxa_spi_info *c; - char buf[40]; ret = pcim_enable_device(dev); if (ret) @@ -283,11 +300,9 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id); - ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0, - c->max_clk_rate); - if (IS_ERR(ssp->clk)) - return PTR_ERR(ssp->clk); + ret = pxa2xx_spi_pci_clk_register(dev, ssp, c->max_clk_rate); + if (ret) + return ret; memset(&pi, 0, sizeof(pi)); pi.fwnode = dev_fwnode(&dev->dev); @@ -298,10 +313,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, pi.size_data = sizeof(spi_pdata); pdev = platform_device_register_full(&pi); - if (IS_ERR(pdev)) { - clk_unregister(ssp->clk); + if (IS_ERR(pdev)) return PTR_ERR(pdev); - } pci_set_drvdata(dev, pdev); @@ -311,12 +324,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, static void pxa2xx_spi_pci_remove(struct pci_dev *dev) { struct platform_device *pdev = pci_get_drvdata(dev); - struct pxa2xx_spi_controller *spi_pdata; - - spi_pdata = dev_get_platdata(&pdev->dev); platform_device_unregister(pdev); - clk_unregister(spi_pdata->ssp.clk); } static const struct pci_device_id pxa2xx_spi_pci_devices[] = { -- GitLab From ba8d1353d9c2d9190a523860e37bd7cb7b9de31b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:49 +0200 Subject: [PATCH 0821/1586] spi: pxa2xx-pci: Drop temporary storage use for a handful of members Instead of using temporary storage, assign the values directly to the corresponding struct pxa2xx_spi_controller members. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-10-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 110 ++++++++++++++++------------------- 1 file changed, 49 insertions(+), 61 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 2dbe08034ad03..3c5d14affa956 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -28,19 +28,7 @@ #define PCI_DEVICE_ID_INTEL_LPT1_1 0x9ce6 struct pxa_spi_info { - enum pxa_ssp_type type; - unsigned int port_id; - unsigned int num_chipselect; - unsigned long max_clk_rate; - - /* DMA channel request parameters */ - bool (*dma_filter)(struct dma_chan *chan, void *param); - void *tx_param; - void *rx_param; - - unsigned int dma_burst_size; - - int (*setup)(struct pci_dev *pdev, struct pxa_spi_info *c); + int (*setup)(struct pci_dev *pdev, struct pxa2xx_spi_controller *c); }; static struct dw_dma_slave byt_tx_param = { .dst_id = 0 }; @@ -99,48 +87,49 @@ static void lpss_dma_put_device(void *dma_dev) pci_dev_put(dma_dev); } -static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) +static int lpss_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) { + struct ssp_device *ssp = &c->ssp; struct dw_dma_slave *tx, *rx; struct pci_dev *dma_dev; int ret; switch (dev->device) { case PCI_DEVICE_ID_INTEL_BYT: - c->type = LPSS_BYT_SSP; - c->port_id = 0; + ssp->type = LPSS_BYT_SSP; + ssp->port_id = 0; c->tx_param = &byt_tx_param; c->rx_param = &byt_rx_param; break; case PCI_DEVICE_ID_INTEL_BSW0: - c->type = LPSS_BSW_SSP; - c->port_id = 0; + ssp->type = LPSS_BSW_SSP; + ssp->port_id = 0; c->tx_param = &bsw0_tx_param; c->rx_param = &bsw0_rx_param; break; case PCI_DEVICE_ID_INTEL_BSW1: - c->type = LPSS_BSW_SSP; - c->port_id = 1; + ssp->type = LPSS_BSW_SSP; + ssp->port_id = 1; c->tx_param = &bsw1_tx_param; c->rx_param = &bsw1_rx_param; break; case PCI_DEVICE_ID_INTEL_BSW2: - c->type = LPSS_BSW_SSP; - c->port_id = 2; + ssp->type = LPSS_BSW_SSP; + ssp->port_id = 2; c->tx_param = &bsw2_tx_param; c->rx_param = &bsw2_rx_param; break; case PCI_DEVICE_ID_INTEL_LPT0_0: case PCI_DEVICE_ID_INTEL_LPT1_0: - c->type = LPSS_LPT_SSP; - c->port_id = 0; + ssp->type = LPSS_LPT_SSP; + ssp->port_id = 0; c->tx_param = &lpt0_tx_param; c->rx_param = &lpt0_rx_param; break; case PCI_DEVICE_ID_INTEL_LPT0_1: case PCI_DEVICE_ID_INTEL_LPT1_1: - c->type = LPSS_LPT_SSP; - c->port_id = 1; + ssp->type = LPSS_LPT_SSP; + ssp->port_id = 1; c->tx_param = &lpt1_tx_param; c->rx_param = &lpt1_rx_param; break; @@ -149,7 +138,10 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) } c->num_chipselect = 1; - c->max_clk_rate = 50000000; + + ret = pxa2xx_spi_pci_clk_register(dev, ssp, 50000000); + if (ret) + return ret; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); @@ -168,6 +160,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) c->dma_filter = lpss_dma_filter; c->dma_burst_size = 1; + c->enable_dma = 1; return 0; } @@ -175,41 +168,45 @@ static struct pxa_spi_info lpss_info_config = { .setup = lpss_spi_setup, }; -static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) +static int ce4100_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) { - c->type = PXA25x_SSP; - c->port_id = dev->devfn; + struct ssp_device *ssp = &c->ssp; + + ssp->type = PXA25x_SSP; + ssp->port_id = dev->devfn; c->num_chipselect = dev->devfn; - c->max_clk_rate = 3686400; - return 0; + return pxa2xx_spi_pci_clk_register(dev, ssp, 3686400); } static struct pxa_spi_info ce4100_info_config = { .setup = ce4100_spi_setup, }; -static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) +static int mrfld_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) { + struct ssp_device *ssp = &c->ssp; struct dw_dma_slave *tx, *rx; struct pci_dev *dma_dev; int ret; + ssp->type = MRFLD_SSP; + switch (PCI_FUNC(dev->devfn)) { case 0: - c->port_id = 3; + ssp->port_id = 3; c->num_chipselect = 1; c->tx_param = &mrfld3_tx_param; c->rx_param = &mrfld3_rx_param; break; case 1: - c->port_id = 5; + ssp->port_id = 5; c->num_chipselect = 4; c->tx_param = &mrfld5_tx_param; c->rx_param = &mrfld5_rx_param; break; case 2: - c->port_id = 6; + ssp->port_id = 6; c->num_chipselect = 1; c->tx_param = &mrfld6_tx_param; c->rx_param = &mrfld6_rx_param; @@ -218,8 +215,9 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } - c->type = MRFLD_SSP; - c->max_clk_rate = 25000000; + ret = pxa2xx_spi_pci_clk_register(dev, ssp, 25000000); + if (ret) + return ret; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); @@ -234,6 +232,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) c->dma_filter = lpss_dma_filter; c->dma_burst_size = 8; + c->enable_dma = 1; return 0; } @@ -241,14 +240,15 @@ static struct pxa_spi_info mrfld_info_config = { .setup = mrfld_spi_setup, }; -static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) +static int qrk_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) { - c->type = QUARK_X1000_SSP; - c->port_id = dev->devfn; + struct ssp_device *ssp = &c->ssp; + + ssp->type = QUARK_X1000_SSP; + ssp->port_id = dev->devfn; c->num_chipselect = 1; - c->max_clk_rate = 50000000; - return 0; + return pxa2xx_spi_pci_clk_register(dev, ssp, 50000000); } static struct pxa_spi_info qrk_info_config = { @@ -262,8 +262,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, int ret; struct platform_device *pdev; struct pxa2xx_spi_controller spi_pdata; + struct pxa_spi_info *info; struct ssp_device *ssp; - struct pxa_spi_info *c; ret = pcim_enable_device(dev); if (ret) @@ -273,25 +273,17 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, if (ret) return ret; - c = (struct pxa_spi_info *)ent->driver_data; - ret = c->setup(dev, c); - if (ret) - return ret; - memset(&spi_pdata, 0, sizeof(spi_pdata)); - spi_pdata.num_chipselect = c->num_chipselect; - spi_pdata.dma_filter = c->dma_filter; - spi_pdata.tx_param = c->tx_param; - spi_pdata.rx_param = c->rx_param; - spi_pdata.enable_dma = c->rx_param && c->tx_param; - spi_pdata.dma_burst_size = c->dma_burst_size; ssp = &spi_pdata.ssp; ssp->dev = &dev->dev; ssp->phys_base = pci_resource_start(dev, 0); ssp->mmio_base = pcim_iomap_table(dev)[0]; - ssp->type = c->type; - ssp->port_id = c->port_id; + + info = (struct pxa_spi_info *)ent->driver_data; + ret = info->setup(dev, &spi_pdata); + if (ret) + return ret; pci_set_master(dev); @@ -300,10 +292,6 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - ret = pxa2xx_spi_pci_clk_register(dev, ssp, c->max_clk_rate); - if (ret) - return ret; - memset(&pi, 0, sizeof(pi)); pi.fwnode = dev_fwnode(&dev->dev); pi.parent = &dev->dev; -- GitLab From fcaaf76ed5f3bbf346db9e49d9d9c0978d8f8dce Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Feb 2022 19:23:50 +0200 Subject: [PATCH 0822/1586] spi: pxa2xx-pci: Constify struct pxa_spi_info variables Now when there are no dynamical changes required, we may constify struct pxa_spi_info variables. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220225172350.69797-11-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 3c5d14affa956..861b21c635043 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -164,7 +164,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) return 0; } -static struct pxa_spi_info lpss_info_config = { +static const struct pxa_spi_info lpss_info_config = { .setup = lpss_spi_setup, }; @@ -179,7 +179,7 @@ static int ce4100_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c return pxa2xx_spi_pci_clk_register(dev, ssp, 3686400); } -static struct pxa_spi_info ce4100_info_config = { +static const struct pxa_spi_info ce4100_info_config = { .setup = ce4100_spi_setup, }; @@ -236,7 +236,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) return 0; } -static struct pxa_spi_info mrfld_info_config = { +static const struct pxa_spi_info mrfld_info_config = { .setup = mrfld_spi_setup, }; @@ -251,18 +251,18 @@ static int qrk_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c) return pxa2xx_spi_pci_clk_register(dev, ssp, 50000000); } -static struct pxa_spi_info qrk_info_config = { +static const struct pxa_spi_info qrk_info_config = { .setup = qrk_spi_setup, }; static int pxa2xx_spi_pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) { + const struct pxa_spi_info *info; struct platform_device_info pi; int ret; struct platform_device *pdev; struct pxa2xx_spi_controller spi_pdata; - struct pxa_spi_info *info; struct ssp_device *ssp; ret = pcim_enable_device(dev); -- GitLab From 6bb477df04366e0f69dd2f49e1ae1099069326bc Mon Sep 17 00:00:00 2001 From: Yun Zhou Date: Thu, 17 Feb 2022 22:12:34 +0800 Subject: [PATCH 0823/1586] spi: use specific last_cs instead of last_cs_enable Commit d40f0b6f2e21 instroduced last_cs_enable to avoid setting chipselect if it's not necessary, but it also introduces a bug. The chipselect may not be set correctly on multi-device SPI busses. The reason is that we can't judge the chipselect by bool last_cs_enable, since chipselect may be modified after other devices were accessed. So we should record the specific state of chipselect in case of confusion. Signed-off-by: Yun Zhou Link: https://lore.kernel.org/r/20220217141234.72737-1-yun.zhou@windriver.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 8 ++++++-- include/linux/spi/spi.h | 5 +++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index cd4dc3131e17f..6326e592fcfdb 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -926,13 +926,14 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force) * Avoid calling into the driver (or doing delays) if the chip select * isn't actually changing from the last time this was called. */ - if (!force && (spi->controller->last_cs_enable == enable) && + if (!force && ((enable && spi->controller->last_cs == spi->chip_select) || + (!enable && spi->controller->last_cs != spi->chip_select)) && (spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH))) return; trace_spi_set_cs(spi, activate); - spi->controller->last_cs_enable = enable; + spi->controller->last_cs = enable ? spi->chip_select : -1; spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH; if ((spi->cs_gpiod || !spi->controller->set_cs_timing) && !activate) { @@ -3016,6 +3017,9 @@ int spi_register_controller(struct spi_controller *ctlr) goto free_bus_id; } + /* setting last_cs to -1 means no chip selected */ + ctlr->last_cs = -1; + status = device_add(&ctlr->dev); if (status < 0) goto free_bus_id; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 579d71cdf6fab..7d005fa4631c3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -370,7 +370,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message * @cur_msg_mapped: message has been mapped for DMA - * @last_cs_enable: was enable true on the last call to set_cs. + * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip + * selected * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy @@ -603,7 +604,7 @@ struct spi_controller { bool auto_runtime_pm; bool cur_msg_prepared; bool cur_msg_mapped; - bool last_cs_enable; + char last_cs; bool last_cs_mode_high; bool fallback; struct completion xfer_completion; -- GitLab From f5ec592dd3bcf7c91f7c262a7f5011e001d269cd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Jan 2022 13:59:36 -0800 Subject: [PATCH 0824/1586] block: simplify calling convention of elv_unregister_queue() Make elv_unregister_queue() a no-op if q->elevator is NULL or is not registered. This simplifies the existing callers, as well as the future caller in the error path of blk_register_queue(). Also don't bother checking whether q is NULL, since it never is. Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124215938.2769-2-ebiggers@kernel.org Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 3 +-- block/elevator.c | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4c6b7dff71e5b..49854c7ea29b0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -960,8 +960,7 @@ void blk_unregister_queue(struct gendisk *disk) blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); - if (q->elevator) - elv_unregister_queue(q); + elv_unregister_queue(q); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); diff --git a/block/elevator.c b/block/elevator.c index 6847ab6e7aa50..a842e4b8ebc66 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -516,9 +516,11 @@ int elv_register_queue(struct request_queue *q, bool uevent) void elv_unregister_queue(struct request_queue *q) { + struct elevator_queue *e = q->elevator; + lockdep_assert_held(&q->sysfs_lock); - if (q) { + if (e && e->registered) { struct elevator_queue *e = q->elevator; kobject_uevent(&e->kobj, KOBJ_REMOVE); @@ -593,9 +595,7 @@ int elevator_switch_mq(struct request_queue *q, lockdep_assert_held(&q->sysfs_lock); if (q->elevator) { - if (q->elevator->registered) - elv_unregister_queue(q); - + elv_unregister_queue(q); ioc_clear_queue(q); blk_mq_sched_free_rqs(q); elevator_exit(q); -- GitLab From 0f69288253e9fc7c495047720e523b9f1aba5712 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Jan 2022 13:59:37 -0800 Subject: [PATCH 0825/1586] block: don't delete queue kobject before its children kobjects aren't supposed to be deleted before their child kobjects are deleted. Apparently this is usually benign; however, a WARN will be triggered if one of the child kobjects has a named attribute group: sysfs group 'modes' not found for kobject 'crypto' WARNING: CPU: 0 PID: 1 at fs/sysfs/group.c:278 sysfs_remove_group+0x72/0x80 ... Call Trace: sysfs_remove_groups+0x29/0x40 fs/sysfs/group.c:312 __kobject_del+0x20/0x80 lib/kobject.c:611 kobject_cleanup+0xa4/0x140 lib/kobject.c:696 kobject_release lib/kobject.c:736 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x53/0x70 lib/kobject.c:753 blk_crypto_sysfs_unregister+0x10/0x20 block/blk-crypto-sysfs.c:159 blk_unregister_queue+0xb0/0x110 block/blk-sysfs.c:962 del_gendisk+0x117/0x250 block/genhd.c:610 Fix this by moving the kobject_del() and the corresponding kobject_uevent() to the correct place. Fixes: 2c2086afc2b8 ("block: Protect less code with sysfs_lock in blk_{un,}register_queue()") Reviewed-by: Hannes Reinecke Reviewed-by: Greg Kroah-Hartman Reviewed-by: Bart Van Assche Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220124215938.2769-3-ebiggers@kernel.org Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 49854c7ea29b0..b38ef1bbf389c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -954,15 +954,17 @@ void blk_unregister_queue(struct gendisk *disk) */ if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); - - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); elv_unregister_queue(q); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); + + /* Now that we've deleted all child objects, we can delete the queue. */ + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + mutex_unlock(&q->sysfs_dir_lock); kobject_put(&disk_to_dev(disk)->kobj); -- GitLab From 20f01f163203666010ee1560852590a0c0572726 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Jan 2022 13:59:38 -0800 Subject: [PATCH 0826/1586] blk-crypto: show crypto capabilities in sysfs Add sysfs files that expose the inline encryption capabilities of request queues: /sys/block/$disk/queue/crypto/max_dun_bits /sys/block/$disk/queue/crypto/modes/$mode /sys/block/$disk/queue/crypto/num_keyslots Userspace can use these new files to decide what encryption settings to use, or whether to use inline encryption at all. This also brings the crypto capabilities in line with the other queue properties, which are already discoverable via the queue directory in sysfs. Design notes: - Place the new files in a new subdirectory "crypto" to group them together and to avoid complicating the main "queue" directory. This also makes it possible to replace "crypto" with a symlink later if we ever make the blk_crypto_profiles into real kobjects (see below). - It was necessary to define a new kobject that corresponds to the crypto subdirectory. For now, this kobject just contains a pointer to the blk_crypto_profile. Note that multiple queues (and hence multiple such kobjects) may refer to the same blk_crypto_profile. An alternative design would more closely match the current kernel data structures: the blk_crypto_profile could be a kobject itself, located directly under the host controller device's kobject, while /sys/block/$disk/queue/crypto would be a symlink to it. I decided not to do that for now because it would require a lot more changes, such as no longer embedding blk_crypto_profile in other structures, and also because I'm not sure we can rule out moving the crypto capabilities into 'struct queue_limits' in the future. (Even if multiple queues share the same crypto engine, maybe the supported data unit sizes could differ due to other queue properties.) It would also still be possible to switch to that design later without breaking userspace, by replacing the directory with a symlink. - Use "max_dun_bits" instead of "max_dun_bytes". Currently, the kernel internally stores this value in bytes, but that's an implementation detail. It probably makes more sense to talk about this value in bits, and choosing bits is more future-proof. - "modes" is a sub-subdirectory, since there may be multiple supported crypto modes, sysfs is supposed to have one value per file, and it makes sense to group all the mode files together. - Each mode had to be named. The crypto API names like "xts(aes)" are not appropriate because they don't specify the key size. Therefore, I assigned new names. The exact names chosen are arbitrary, but they happen to match the names used in log messages in fs/crypto/. - The "num_keyslots" file is a bit different from the others in that it is only useful to know for performance reasons. However, it's included as it can still be useful. For example, a user might not want to use inline encryption if there aren't very many keyslots. Reviewed-by: Hannes Reinecke Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20220124215938.2769-4-ebiggers@kernel.org Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 49 ++++++++ block/Makefile | 3 +- block/blk-crypto-internal.h | 12 ++ block/blk-crypto-sysfs.c | 172 +++++++++++++++++++++++++++ block/blk-crypto.c | 3 + block/blk-sysfs.c | 6 + include/linux/blkdev.h | 1 + 7 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 block/blk-crypto-sysfs.c diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 8dd3e84a8aade..e8797cd09aff9 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -155,6 +155,55 @@ Description: last zone of the device which may be smaller. +What: /sys/block//queue/crypto/ +Date: February 2022 +Contact: linux-block@vger.kernel.org +Description: + The presence of this subdirectory of /sys/block//queue/ + indicates that the device supports inline encryption. This + subdirectory contains files which describe the inline encryption + capabilities of the device. For more information about inline + encryption, refer to Documentation/block/inline-encryption.rst. + + +What: /sys/block//queue/crypto/max_dun_bits +Date: February 2022 +Contact: linux-block@vger.kernel.org +Description: + [RO] This file shows the maximum length, in bits, of data unit + numbers accepted by the device in inline encryption requests. + + +What: /sys/block//queue/crypto/modes/ +Date: February 2022 +Contact: linux-block@vger.kernel.org +Description: + [RO] For each crypto mode (i.e., encryption/decryption + algorithm) the device supports with inline encryption, a file + will exist at this location. It will contain a hexadecimal + number that is a bitmask of the supported data unit sizes, in + bytes, for that crypto mode. + + Currently, the crypto modes that may be supported are: + + * AES-256-XTS + * AES-128-CBC-ESSIV + * Adiantum + + For example, if a device supports AES-256-XTS inline encryption + with data unit sizes of 512 and 4096 bytes, the file + /sys/block//queue/crypto/modes/AES-256-XTS will exist and + will contain "0x1200". + + +What: /sys/block//queue/crypto/num_keyslots +Date: February 2022 +Contact: linux-block@vger.kernel.org +Description: + [RO] This file shows the number of keyslots the device has for + use with inline encryption. + + What: /sys/block//queue/dax Date: June 2016 Contact: linux-block@vger.kernel.org diff --git a/block/Makefile b/block/Makefile index f38eaa6129296..3950ecbc5c263 100644 --- a/block/Makefile +++ b/block/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o obj-$(CONFIG_BLK_PM) += blk-pm.o -obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \ + blk-crypto-sysfs.o obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 2fb0d65a464ca..e6818ffaddbf8 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -11,6 +11,7 @@ /* Represents a crypto mode supported by blk-crypto */ struct blk_crypto_mode { + const char *name; /* name of this mode, shown in sysfs */ const char *cipher_str; /* crypto API name (for fallback case) */ unsigned int keysize; /* key size in bytes */ unsigned int ivsize; /* iv size in bytes */ @@ -20,6 +21,10 @@ extern const struct blk_crypto_mode blk_crypto_modes[]; #ifdef CONFIG_BLK_INLINE_ENCRYPTION +int blk_crypto_sysfs_register(struct request_queue *q); + +void blk_crypto_sysfs_unregister(struct request_queue *q); + void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], unsigned int inc); @@ -62,6 +67,13 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq) #else /* CONFIG_BLK_INLINE_ENCRYPTION */ +static inline int blk_crypto_sysfs_register(struct request_queue *q) +{ + return 0; +} + +static inline void blk_crypto_sysfs_unregister(struct request_queue *q) { } + static inline bool bio_crypt_rq_ctx_compatible(struct request *rq, struct bio *bio) { diff --git a/block/blk-crypto-sysfs.c b/block/blk-crypto-sysfs.c new file mode 100644 index 0000000000000..fd93bd2f33b75 --- /dev/null +++ b/block/blk-crypto-sysfs.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Google LLC + * + * sysfs support for blk-crypto. This file contains the code which exports the + * crypto capabilities of devices via /sys/block/$disk/queue/crypto/. + */ + +#include + +#include "blk-crypto-internal.h" + +struct blk_crypto_kobj { + struct kobject kobj; + struct blk_crypto_profile *profile; +}; + +struct blk_crypto_attr { + struct attribute attr; + ssize_t (*show)(struct blk_crypto_profile *profile, + struct blk_crypto_attr *attr, char *page); +}; + +static struct blk_crypto_profile *kobj_to_crypto_profile(struct kobject *kobj) +{ + return container_of(kobj, struct blk_crypto_kobj, kobj)->profile; +} + +static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr) +{ + return container_of(attr, struct blk_crypto_attr, attr); +} + +static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile, + struct blk_crypto_attr *attr, char *page) +{ + return sysfs_emit(page, "%u\n", 8 * profile->max_dun_bytes_supported); +} + +static ssize_t num_keyslots_show(struct blk_crypto_profile *profile, + struct blk_crypto_attr *attr, char *page) +{ + return sysfs_emit(page, "%u\n", profile->num_slots); +} + +#define BLK_CRYPTO_RO_ATTR(_name) \ + static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name) + +BLK_CRYPTO_RO_ATTR(max_dun_bits); +BLK_CRYPTO_RO_ATTR(num_keyslots); + +static struct attribute *blk_crypto_attrs[] = { + &max_dun_bits_attr.attr, + &num_keyslots_attr.attr, + NULL, +}; + +static const struct attribute_group blk_crypto_attr_group = { + .attrs = blk_crypto_attrs, +}; + +/* + * The encryption mode attributes. To avoid hard-coding the list of encryption + * modes, these are initialized at boot time by blk_crypto_sysfs_init(). + */ +static struct blk_crypto_attr __blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX]; +static struct attribute *blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX + 1]; + +static umode_t blk_crypto_mode_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj); + struct blk_crypto_attr *a = attr_to_crypto_attr(attr); + int mode_num = a - __blk_crypto_mode_attrs; + + if (profile->modes_supported[mode_num]) + return 0444; + return 0; +} + +static ssize_t blk_crypto_mode_show(struct blk_crypto_profile *profile, + struct blk_crypto_attr *attr, char *page) +{ + int mode_num = attr - __blk_crypto_mode_attrs; + + return sysfs_emit(page, "0x%x\n", profile->modes_supported[mode_num]); +} + +static const struct attribute_group blk_crypto_modes_attr_group = { + .name = "modes", + .attrs = blk_crypto_mode_attrs, + .is_visible = blk_crypto_mode_is_visible, +}; + +static const struct attribute_group *blk_crypto_attr_groups[] = { + &blk_crypto_attr_group, + &blk_crypto_modes_attr_group, + NULL, +}; + +static ssize_t blk_crypto_attr_show(struct kobject *kobj, + struct attribute *attr, char *page) +{ + struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj); + struct blk_crypto_attr *a = attr_to_crypto_attr(attr); + + return a->show(profile, a, page); +} + +static const struct sysfs_ops blk_crypto_attr_ops = { + .show = blk_crypto_attr_show, +}; + +static void blk_crypto_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct blk_crypto_kobj, kobj)); +} + +static struct kobj_type blk_crypto_ktype = { + .default_groups = blk_crypto_attr_groups, + .sysfs_ops = &blk_crypto_attr_ops, + .release = blk_crypto_release, +}; + +/* + * If the request_queue has a blk_crypto_profile, create the "crypto" + * subdirectory in sysfs (/sys/block/$disk/queue/crypto/). + */ +int blk_crypto_sysfs_register(struct request_queue *q) +{ + struct blk_crypto_kobj *obj; + int err; + + if (!q->crypto_profile) + return 0; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + obj->profile = q->crypto_profile; + + err = kobject_init_and_add(&obj->kobj, &blk_crypto_ktype, &q->kobj, + "crypto"); + if (err) { + kobject_put(&obj->kobj); + return err; + } + q->crypto_kobject = &obj->kobj; + return 0; +} + +void blk_crypto_sysfs_unregister(struct request_queue *q) +{ + kobject_put(q->crypto_kobject); +} + +static int __init blk_crypto_sysfs_init(void) +{ + int i; + + BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0); + for (i = 1; i < BLK_ENCRYPTION_MODE_MAX; i++) { + struct blk_crypto_attr *attr = &__blk_crypto_mode_attrs[i]; + + attr->attr.name = blk_crypto_modes[i].name; + attr->attr.mode = 0444; + attr->show = blk_crypto_mode_show; + blk_crypto_mode_attrs[i - 1] = &attr->attr; + } + return 0; +} +subsys_initcall(blk_crypto_sysfs_init); diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 773dae4c329ba..a496aaef85ba4 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -19,16 +19,19 @@ const struct blk_crypto_mode blk_crypto_modes[] = { [BLK_ENCRYPTION_MODE_AES_256_XTS] = { + .name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, .ivsize = 16, }, [BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = { + .name = "AES-128-CBC-ESSIV", .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, .ivsize = 16, }, [BLK_ENCRYPTION_MODE_ADIANTUM] = { + .name = "Adiantum", .cipher_str = "adiantum(xchacha12,aes)", .keysize = 32, .ivsize = 32, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b38ef1bbf389c..241ded62f458f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -880,6 +880,10 @@ int blk_register_queue(struct gendisk *disk) goto put_dev; } + ret = blk_crypto_sysfs_register(q); + if (ret) + goto put_dev; + blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); wbt_enable_default(q); blk_throtl_register_queue(q); @@ -910,6 +914,7 @@ unlock: return ret; put_dev: + elv_unregister_queue(q); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); @@ -954,6 +959,7 @@ void blk_unregister_queue(struct gendisk *disk) */ if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); + blk_crypto_sysfs_unregister(q); blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f757f9c2871f8..e19947d84f128 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -413,6 +413,7 @@ struct request_queue { #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; + struct kobject *crypto_kobject; #endif unsigned int rq_timeout; -- GitLab From 25d490eb46486e88c16e64d9eb7cfd33a642d596 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:40 +0100 Subject: [PATCH 0827/1586] ARM: 9172/1: amba: Cleanup amba pclk operation There is no user about amba_pclk_[un]prepare() besides pl330.c, directly use clk_[un]prepare(). After this, all the function about amba pclk operation, enable, disable, [un]prepare could be killed. Acked-by: Vinod Koul Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- drivers/dma/pl330.c | 4 ++-- include/linux/amba/bus.h | 20 -------------------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 110de8a600588..858400e42ec05 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2968,7 +2968,7 @@ static int __maybe_unused pl330_suspend(struct device *dev) struct amba_device *pcdev = to_amba_device(dev); pm_runtime_force_suspend(dev); - amba_pclk_unprepare(pcdev); + clk_unprepare(pcdev->pclk); return 0; } @@ -2978,7 +2978,7 @@ static int __maybe_unused pl330_resume(struct device *dev) struct amba_device *pcdev = to_amba_device(dev); int ret; - ret = amba_pclk_prepare(pcdev); + ret = clk_prepare(pcdev->pclk); if (ret) return ret; diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6c7f478469715..09174970b8555 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -121,26 +121,6 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -static inline int amba_pclk_enable(struct amba_device *dev) -{ - return clk_enable(dev->pclk); -} - -static inline void amba_pclk_disable(struct amba_device *dev) -{ - clk_disable(dev->pclk); -} - -static inline int amba_pclk_prepare(struct amba_device *dev) -{ - return clk_prepare(dev->pclk); -} - -static inline void amba_pclk_unprepare(struct amba_device *dev) -{ - clk_unprepare(dev->pclk); -} - /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) -- GitLab From dacf3ca134d0dc105caee77651a349a86bd77456 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:41 +0100 Subject: [PATCH 0828/1586] ARM: 9173/1: amba: kill amba_find_match() There is no one use amba_find_match(), kill it. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- drivers/amba/bus.c | 61 ---------------------------------------- include/linux/amba/bus.h | 1 - 2 files changed, 62 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index e1a5eca3ae3cc..dd0ef65e5c3a5 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -669,66 +669,6 @@ void amba_device_unregister(struct amba_device *dev) device_unregister(&dev->dev); } - -struct find_data { - struct amba_device *dev; - struct device *parent; - const char *busid; - unsigned int id; - unsigned int mask; -}; - -static int amba_find_match(struct device *dev, void *data) -{ - struct find_data *d = data; - struct amba_device *pcdev = to_amba_device(dev); - int r; - - r = (pcdev->periphid & d->mask) == d->id; - if (d->parent) - r &= d->parent == dev->parent; - if (d->busid) - r &= strcmp(dev_name(dev), d->busid) == 0; - - if (r) { - get_device(dev); - d->dev = pcdev; - } - - return r; -} - -/** - * amba_find_device - locate an AMBA device given a bus id - * @busid: bus id for device (or NULL) - * @parent: parent device (or NULL) - * @id: peripheral ID (or 0) - * @mask: peripheral ID mask (or 0) - * - * Return the AMBA device corresponding to the supplied parameters. - * If no device matches, returns NULL. - * - * NOTE: When a valid device is found, its refcount is - * incremented, and must be decremented before the returned - * reference. - */ -struct amba_device * -amba_find_device(const char *busid, struct device *parent, unsigned int id, - unsigned int mask) -{ - struct find_data data; - - data.dev = NULL; - data.parent = parent; - data.busid = busid; - data.id = id; - data.mask = mask; - - bus_for_each_dev(&amba_bustype, NULL, &data, amba_find_match); - - return data.dev; -} - /** * amba_request_regions - request all mem regions associated with device * @dev: amba_device structure for device @@ -768,6 +708,5 @@ EXPORT_SYMBOL(amba_driver_register); EXPORT_SYMBOL(amba_driver_unregister); EXPORT_SYMBOL(amba_device_register); EXPORT_SYMBOL(amba_device_unregister); -EXPORT_SYMBOL(amba_find_device); EXPORT_SYMBOL(amba_request_regions); EXPORT_SYMBOL(amba_release_regions); diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 09174970b8555..6562f543c3e04 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -117,7 +117,6 @@ void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); int amba_device_register(struct amba_device *, struct resource *); void amba_device_unregister(struct amba_device *); -struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -- GitLab From a2e7ae86a6eb1e93401214802c4d53872e310d4c Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:42 +0100 Subject: [PATCH 0829/1586] ARM: 9174/1: amba: Move EXPORT_SYMBOL() closer to definition Some EXPORT_SYMBOL() is at the end of the function, but some is at the end of file. For reader sanity and be consistent, move all EXPORT_SYMBOL() declarations just after the end of the function. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- drivers/amba/bus.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index dd0ef65e5c3a5..d3bd14aaabf6e 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -370,6 +370,7 @@ int amba_driver_register(struct amba_driver *drv) return driver_register(&drv->drv); } +EXPORT_SYMBOL(amba_driver_register); /** * amba_driver_unregister - remove an AMBA device driver @@ -383,7 +384,7 @@ void amba_driver_unregister(struct amba_driver *drv) { driver_unregister(&drv->drv); } - +EXPORT_SYMBOL(amba_driver_unregister); static void amba_device_release(struct device *dev) { @@ -642,6 +643,7 @@ int amba_device_register(struct amba_device *dev, struct resource *parent) return amba_device_add(dev, parent); } +EXPORT_SYMBOL(amba_device_register); /** * amba_device_put - put an AMBA device @@ -668,6 +670,7 @@ void amba_device_unregister(struct amba_device *dev) { device_unregister(&dev->dev); } +EXPORT_SYMBOL(amba_device_unregister); /** * amba_request_regions - request all mem regions associated with device @@ -689,6 +692,7 @@ int amba_request_regions(struct amba_device *dev, const char *name) return ret; } +EXPORT_SYMBOL(amba_request_regions); /** * amba_release_regions - release mem regions associated with device @@ -703,10 +707,4 @@ void amba_release_regions(struct amba_device *dev) size = resource_size(&dev->res); release_mem_region(dev->res.start, size); } - -EXPORT_SYMBOL(amba_driver_register); -EXPORT_SYMBOL(amba_driver_unregister); -EXPORT_SYMBOL(amba_device_register); -EXPORT_SYMBOL(amba_device_unregister); -EXPORT_SYMBOL(amba_request_regions); EXPORT_SYMBOL(amba_release_regions); -- GitLab From e46e45f00d9ea54edc590ee056212295635c9a6e Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Wed, 22 Dec 2021 14:18:17 +0100 Subject: [PATCH 0830/1586] ARM: 9175/1: Convert to reserve_initrd_mem() Covert to the generic reserve_initrd_mem() function. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- arch/arm/mm/init.c | 43 +------------------------------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 6d0cb0f7bc54b..fe249ea919083 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -164,47 +164,6 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) return phys; } -static void __init arm_initrd_init(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - phys_addr_t start; - unsigned long size; - - initrd_start = initrd_end = 0; - - if (!phys_initrd_size) - return; - - /* - * Round the memory region to page boundaries as per free_initrd_mem() - * This allows us to detect whether the pages overlapping the initrd - * are in use, but more importantly, reserves the entire set of pages - * as we don't want these pages allocated for other purposes. - */ - start = round_down(phys_initrd_start, PAGE_SIZE); - size = phys_initrd_size + (phys_initrd_start - start); - size = round_up(size, PAGE_SIZE); - - if (!memblock_is_region_memory(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", - (u64)start, size); - return; - } - - if (memblock_is_region_reserved(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", - (u64)start, size); - return; - } - - memblock_reserve(start, size); - - /* Now convert initrd to virtual addresses */ - initrd_start = __phys_to_virt(phys_initrd_start); - initrd_end = initrd_start + phys_initrd_size; -#endif -} - #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND void check_cpu_icache_size(int cpuid) { @@ -226,7 +185,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) /* Register the kernel text, kernel data and initrd with memblock. */ memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); - arm_initrd_init(); + reserve_initrd_mem(); arm_mm_memblock_reserve(); -- GitLab From 9bc19d473014f5b79ae6d4f99150c535781eb19f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 20 Jan 2022 06:44:18 +0100 Subject: [PATCH 0831/1586] ARM: 9181/1: vdso: remove -nostdlib compiler flag The -nostdlib option requests the compiler to not use the standard system startup files or libraries when linking. It is effective only when $(CC) is used as a linker driver. Since commit fe00e50b2db8 ("ARM: 8858/1: vdso: use $(LD) instead of $(CC) to link VDSO"), $(LD) is directly used, hence -nostdlib is unneeded. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Russell King (Oracle) --- arch/arm/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 7c9e395b77f7c..ec52b776f9267 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -18,7 +18,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \ + -z max-page-size=4096 -shared $(ldflags-y) \ --hash-style=sysv --build-id=sha1 \ -T -- GitLab From 64276a9939ff414f2f0db38036cf4e1a0a703394 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 24 Feb 2022 23:04:56 +0100 Subject: [PATCH 0832/1586] random: cleanup UUID handling Rather than hard coding various lengths, we can use the right constants. Strings should be `char *` while buffers should be `u8 *`. Rather than have a nonsensical and unused maxlength, just remove it. Finally, use snprintf instead of sprintf, just out of good hygiene. As well, remove the old comment about returning a binary UUID via the binary sysctl syscall. That syscall was removed from the kernel in 5.5, and actually, the "uuid_strategy" function and related infrastructure for even serving it via the binary sysctl syscall was removed with 894d2491153a ("sysctl drivers: Remove dead binary sysctl support") back in 2.6.33. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 536237a0f073b..4cdf39567bae2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1661,22 +1661,25 @@ const struct file_operations urandom_fops = { static int sysctl_random_min_urandom_seed = 60; static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; static int sysctl_poolsize = POOL_BITS; -static char sysctl_bootid[16]; +static u8 sysctl_bootid[UUID_SIZE]; /* * This function is used to return both the bootid UUID, and random - * UUID. The difference is in whether table->data is NULL; if it is, + * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. - * - * If the user accesses this via the proc interface, the UUID will be - * returned as an ASCII string in the standard UUID format; if via the - * sysctl system call, as 16 bytes of binary data. */ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct ctl_table fake_table; - unsigned char buf[64], tmp_uuid[16], *uuid; + u8 tmp_uuid[UUID_SIZE], *uuid; + char uuid_string[UUID_STRING_LEN + 1]; + struct ctl_table fake_table = { + .data = uuid_string, + .maxlen = UUID_STRING_LEN + }; + + if (write) + return -EPERM; uuid = table->data; if (!uuid) { @@ -1691,12 +1694,8 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, spin_unlock(&bootid_spinlock); } - sprintf(buf, "%pU", uuid); - - fake_table.data = buf; - fake_table.maxlen = sizeof(buf); - - return proc_dostring(&fake_table, write, buffer, lenp, ppos); + snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); + return proc_dostring(&fake_table, 0, buffer, lenp, ppos); } static struct ctl_table random_table[] = { @@ -1731,13 +1730,11 @@ static struct ctl_table random_table[] = { { .procname = "boot_id", .data = &sysctl_bootid, - .maxlen = 16, .mode = 0444, .proc_handler = proc_do_uuid, }, { .procname = "uuid", - .maxlen = 16, .mode = 0444, .proc_handler = proc_do_uuid, }, -- GitLab From abded93ec1e9692920fe309f07f40bd1035f2940 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 24 Feb 2022 18:30:58 +0100 Subject: [PATCH 0833/1586] random: unify cycles_t and jiffies usage and types random_get_entropy() returns a cycles_t, not an unsigned long, which is sometimes 64 bits on various 32-bit platforms, including x86. Conversely, jiffies is always unsigned long. This commit fixes things to use cycles_t for fields that use random_get_entropy(), named "cycles", and unsigned long for fields that use jiffies, named "now". It's also good to mix in a cycles_t and a jiffies in the same way for both add_device_randomness and add_timer_randomness, rather than using xor in one case. Finally, we unify the order of these volatile reads, always reading the more precise cycles counter, and then jiffies, so that the cycle counter is as close to the event as possible. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 56 +++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4cdf39567bae2..3b83a6ac9f596 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1020,12 +1020,6 @@ int __init rand_initialize(void) return 0; } -/* There is one of these per entropy source */ -struct timer_rand_state { - cycles_t last_time; - long last_delta, last_delta2; -}; - /* * Add device- or boot-specific data to the input pool to help * initialize it. @@ -1036,19 +1030,26 @@ struct timer_rand_state { */ void add_device_randomness(const void *buf, size_t size) { - unsigned long time = random_get_entropy() ^ jiffies; - unsigned long flags; + cycles_t cycles = random_get_entropy(); + unsigned long flags, now = jiffies; if (crng_init == 0 && size) crng_pre_init_inject(buf, size, false, false); spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&cycles, sizeof(cycles)); + _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(buf, size); - _mix_pool_bytes(&time, sizeof(time)); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; + /* * This function adds entropy to the entropy "pool" by using timing * delays. It uses the timer_rand_state structure to make an estimate @@ -1057,29 +1058,26 @@ EXPORT_SYMBOL(add_device_randomness); * The number "num" is also added to the pool - it should somehow describe * the type of event which just happened. This is currently 0-255 for * keyboard scan codes, and 256 upwards for interrupts. - * */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - struct { - long jiffies; - unsigned int cycles; - unsigned int num; - } sample; + cycles_t cycles = random_get_entropy(); + unsigned long flags, now = jiffies; long delta, delta2, delta3; - sample.jiffies = jiffies; - sample.cycles = random_get_entropy(); - sample.num = num; - mix_pool_bytes(&sample, sizeof(sample)); + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&cycles, sizeof(cycles)); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); /* * Calculate number of bits of randomness we probably added. * We take into account the first, second and third-order deltas * in order to make our estimate. */ - delta = sample.jiffies - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, sample.jiffies); + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); delta2 = delta - READ_ONCE(state->last_delta); WRITE_ONCE(state->last_delta, delta); @@ -1305,10 +1303,10 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; + cycles_t cycles = random_get_entropy(); + unsigned long now = jiffies; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); unsigned int new_count; if (cycles == 0) @@ -1383,28 +1381,28 @@ static void entropy_timer(struct timer_list *t) static void try_to_generate_entropy(void) { struct { - unsigned long now; + cycles_t cycles; struct timer_list timer; } stack; - stack.now = random_get_entropy(); + stack.cycles = random_get_entropy(); /* Slow counter - or none. Don't even bother */ - if (stack.now == random_get_entropy()) + if (stack.cycles == random_get_entropy()) return; timer_setup_on_stack(&stack.timer, entropy_timer, 0); while (!crng_ready()) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies + 1); - mix_pool_bytes(&stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); schedule(); - stack.now = random_get_entropy(); + stack.cycles = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); } -- GitLab From c2a7de4feb6e09f23af7accc0f882a8fa92e7ae5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Feb 2022 18:25:07 +0100 Subject: [PATCH 0834/1586] random: do crng pre-init loading in worker rather than irq Taking spinlocks from IRQ context is generally problematic for PREEMPT_RT. That is, in part, why we take trylocks instead. However, a spin_try_lock() is also problematic since another spin_lock() invocation can potentially PI-boost the wrong task, as the spin_try_lock() is invoked from an IRQ-context, so the task on CPU (random task or idle) is not the actual owner. Additionally, by deferring the crng pre-init loading to the worker, we can use the cryptographic hash function rather than xor, which is perhaps a meaningful difference when considering this data has only been through the relatively weak fast_mix() function. The biggest downside of this approach is that the pre-init loading is now deferred until later, which means things that need random numbers after interrupts are enabled, but before workqueues are running -- or before this particular worker manages to run -- are going to get into trouble. Hopefully in the real world, this window is rather small, especially since this code won't run until 64 interrupts had occurred. Cc: Sultan Alsawaf Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Eric Biggers Cc: Theodore Ts'o Acked-by: Sebastian Andrzej Siewior Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 65 +++++++++++++------------------------------ 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3b83a6ac9f596..8171c3bbf460f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -443,10 +443,6 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * boot time when it's better to have something there rather than * nothing. * - * There are two paths, a slow one and a fast one. The slow one - * hashes the input along with the current key. The fast one simply - * xors it in, and should only be used from interrupt context. - * * If account is set, then the crng_init_cnt counter is incremented. * This shouldn't be set by functions like add_device_randomness(), * where we can't trust the buffer passed to it is guaranteed to be @@ -455,19 +451,15 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * Returns the number of bytes processed from input, which is bounded * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_pre_init_inject(const void *input, size_t len, - bool fast, bool account) +static size_t crng_pre_init_inject(const void *input, size_t len, bool account) { static int crng_init_cnt = 0; + struct blake2s_state hash; unsigned long flags; - if (fast) { - if (!spin_trylock_irqsave(&base_crng.lock, flags)) - return 0; - } else { - spin_lock_irqsave(&base_crng.lock, flags); - } + blake2s_init(&hash, sizeof(base_crng.key)); + spin_lock_irqsave(&base_crng.lock, flags); if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); return 0; @@ -476,21 +468,9 @@ static size_t crng_pre_init_inject(const void *input, size_t len, if (account) len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - if (fast) { - const u8 *src = input; - size_t i; - - for (i = 0; i < len; ++i) - base_crng.key[(crng_init_cnt + i) % - sizeof(base_crng.key)] ^= src[i]; - } else { - struct blake2s_state hash; - - blake2s_init(&hash, sizeof(base_crng.key)); - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, input, len); - blake2s_final(&hash, base_crng.key); - } + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, input, len); + blake2s_final(&hash, base_crng.key); if (account) { crng_init_cnt += len; @@ -1034,7 +1014,7 @@ void add_device_randomness(const void *buf, size_t size) unsigned long flags, now = jiffies; if (crng_init == 0 && size) - crng_pre_init_inject(buf, size, false, false); + crng_pre_init_inject(buf, size, false); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&cycles, sizeof(cycles)); @@ -1155,7 +1135,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { - size_t ret = crng_pre_init_inject(buffer, count, false, true); + size_t ret = crng_pre_init_inject(buffer, count, true); mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; @@ -1295,8 +1275,14 @@ static void mix_interrupt_randomness(struct work_struct *work) fast_pool->last = jiffies; local_irq_enable(); - mix_pool_bytes(pool, sizeof(pool)); - credit_entropy_bits(1); + if (unlikely(crng_init == 0)) { + crng_pre_init_inject(pool, sizeof(pool), true); + mix_pool_bytes(pool, sizeof(pool)); + } else { + mix_pool_bytes(pool, sizeof(pool)); + credit_entropy_bits(1); + } + memzero_explicit(pool, sizeof(pool)); } @@ -1329,24 +1315,11 @@ void add_interrupt_randomness(int irq) fast_mix(fast_pool->pool32); new_count = ++fast_pool->count; - if (unlikely(crng_init == 0)) { - if (new_count >= 64 && - crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32), - true, true) > 0) { - fast_pool->count = 0; - fast_pool->last = now; - if (spin_trylock(&input_pool.lock)) { - _mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32)); - spin_unlock(&input_pool.lock); - } - } - return; - } - if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && !time_after(now, fast_pool->last + HZ)) + if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) || + unlikely(crng_init == 0))) return; if (unlikely(!fast_pool->mix.func)) -- GitLab From f16ed63e53c79070283d3c264de5309794272ae9 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Sun, 27 Feb 2022 22:59:56 +0000 Subject: [PATCH 0835/1586] spi: dt-bindings: renesas,rspi: Document RZ/V2L SoC Add RSPI binding documentation for Renesas RZ/V2L SoC. RSPI block is identical to one found on RZ/A, so no driver changes are required. The fallback compatible string "renesas,rspi-rz" will be used on RZ/V2L. Signed-off-by: Lad Prabhakar Reviewed-by: Biju Das Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220227225956.29570-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/renesas,rspi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml index 76e6d9e52fc72..a902f0ca21983 100644 --- a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml +++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml @@ -22,6 +22,7 @@ properties: - renesas,rspi-r7s72100 # RZ/A1H - renesas,rspi-r7s9210 # RZ/A2 - renesas,r9a07g044-rspi # RZ/G2{L,LC} + - renesas,r9a07g054-rspi # RZ/V2L - const: renesas,rspi-rz # RZ/A and RZ/G2{L,LC} - items: @@ -124,6 +125,7 @@ allOf: enum: - renesas,qspi - renesas,r9a07g044-rspi + - renesas,r9a07g054-rspi then: required: - resets -- GitLab From e377a3e698fb56cb63f6bddbebe7da76dc37e316 Mon Sep 17 00:00:00 2001 From: Ondrej Valousek Date: Tue, 11 Jan 2022 13:08:42 +0100 Subject: [PATCH 0836/1586] nfsd: Add support for the birth time attribute For filesystems that supports "btime" timestamp (i.e. most modern filesystems do) we share it via kernel nfsd. Btime support for NFS client has already been added by Trond recently. Suggested-by: Bruce Fields Signed-off-by: Ondrej Valousek [ cel: addressed some whitespace/checkpatch nits ] Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 ++++++++++ fs/nfsd/nfsd.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 714a3a3bd50c3..da92e7d2ab6a1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2854,6 +2854,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; + if (!(stat.result_mask & STATX_BTIME)) + /* underlying FS does not offer btime so we can't share it */ + bmval1 &= ~FATTR4_WORD1_TIME_CREATE; if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | @@ -3254,6 +3257,13 @@ out_acl: p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); *p++ = cpu_to_be32(stat.mtime.tv_nsec); } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); + *p++ = cpu_to_be32(stat.btime.tv_nsec); + } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { struct kstat parent_stat; u64 ino = stat.ino; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 3e5008b475ff0..4fc1fd639527a 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -364,7 +364,7 @@ void nfsd_lockd_shutdown(void); | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \ - | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ + | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) #define NFSD4_SUPPORTED_ATTRS_WORD2 0 -- GitLab From 378a6109dd142a678f629b740f558365150f60f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Sep 2021 19:19:57 -0400 Subject: [PATCH 0837/1586] NFSD: De-duplicate hash bucket indexing Clean up: The details of finding the right hash bucket are exactly the same in both nfsd_cache_lookup() and nfsd_cache_update(). Signed-off-by: Chuck Lever --- fs/nfsd/nfscache.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index a4a69ab6ab280..f79790d367288 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -84,12 +84,6 @@ nfsd_hashsize(unsigned int limit) return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } -static u32 -nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) -{ - return hash_32((__force u32)xid, nn->maskbits); -} - static struct svc_cacherep * nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, struct nfsd_net *nn) @@ -241,6 +235,14 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) list_move_tail(&rp->c_lru, &b->lru_head); } +static noinline struct nfsd_drc_bucket * +nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn) +{ + unsigned int hash = hash_32((__force u32)xid, nn->maskbits); + + return &nn->drc_hashtbl[hash]; +} + static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, unsigned int max) { @@ -421,10 +423,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; - __be32 xid = rqstp->rq_xid; __wsum csum; - u32 hash = nfsd_cache_hash(xid, nn); - struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; + struct nfsd_drc_bucket *b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -528,7 +528,6 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; - u32 hash; struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; @@ -536,8 +535,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_key.k_xid, nn); - b = &nn->drc_hashtbl[hash]; + b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn); len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; -- GitLab From 0f29ce32fbc56cfdb304eec8a4deb920ccfd89c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Sep 2021 11:39:02 -0400 Subject: [PATCH 0838/1586] NFSD: Skip extra computation for RC_NOCACHE case Force the compiler to skip unneeded initialization for cases that don't need those values. For example, NFSv4 COMPOUND operations are RC_NOCACHE. Signed-off-by: Chuck Lever --- fs/nfsd/nfscache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index f79790d367288..34087a7e4f93c 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -421,10 +421,10 @@ out: */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_net *nn; struct svc_cacherep *rp, *found; __wsum csum; - struct nfsd_drc_bucket *b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); + struct nfsd_drc_bucket *b; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -440,10 +440,12 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ + nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rp = nfsd_reply_cache_alloc(rqstp, csum, nn); if (!rp) goto out; + b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); if (found != rp) { -- GitLab From add1511c38166cf1036765f8c4aa939f0275a799 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Sep 2021 11:40:59 -0400 Subject: [PATCH 0839/1586] NFSD: Streamline the rare "found" case Move a rarely called function call site out of the hot path. This is an exceptionally small improvement because the compiler inlines most of the functions that nfsd_cache_lookup() calls. Signed-off-by: Chuck Lever --- fs/nfsd/nfscache.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 34087a7e4f93c..0b3f12aa37ff5 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -448,11 +448,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); - if (found != rp) { - nfsd_reply_cache_free_locked(NULL, rp, nn); - rp = found; + if (found != rp) goto found_entry; - } nfsd_stats_rc_misses_inc(); rqstp->rq_cacherep = rp; @@ -470,8 +467,10 @@ out: found_entry: /* We found a matching entry which is either in progress or done. */ + nfsd_reply_cache_free_locked(NULL, rp, nn); nfsd_stats_rc_hits_inc(); rtn = RC_DROPIT; + rp = found; /* Request being processed */ if (rp->c_state == RC_INPROG) -- GitLab From d07c9ad622474616e94572e59e725c2c4a494fb4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 11 Jan 2022 12:43:55 -0500 Subject: [PATCH 0840/1586] tracing: Introduce helpers to safely handle dynamic-sized sockaddrs Enable a struct sockaddr to be stored in a trace record as a dynamically-sized field. The common cases are AF_INET and AF_INET6 which are different sizes, and are vastly smaller than a struct sockaddr_storage. These are safer because, when used properly, the size of the sockaddr destination field in each trace record is now guaranteed to be the same as the source address that is being copied into it. Link: https://lore.kernel.org/all/164182978641.8391.8277203495236105391.stgit@bazille.1015granger.net/ Signed-off-by: Chuck Lever --- include/trace/bpf_probe.h | 6 ++++ include/trace/perf.h | 6 ++++ include/trace/trace_events.h | 55 ++++++++++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 7660a7846586c..6a13220d2d27b 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,6 +21,9 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ ((void *)(&__entry->__rel_loc_##field) + \ @@ -37,6 +40,9 @@ #undef __get_rel_bitmask #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + #undef __perf_count #define __perf_count(c) (c) diff --git a/include/trace/perf.h b/include/trace/perf.h index 5d48c46a30083..5800d13146c3d 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,6 +21,9 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ ((void *)__entry + \ @@ -38,6 +41,9 @@ #undef __get_rel_bitmask #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + #undef __perf_count #define __perf_count(c) (__count = (c)) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 3d29919045af2..7c86cc541c7a6 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -108,6 +108,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; @@ -120,6 +123,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -212,11 +218,14 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) -#undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) u32 item; @@ -230,6 +239,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct trace_event_data_offsets_##call { \ @@ -349,6 +361,12 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -518,6 +536,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + #undef __rel_dynamic_array #define __rel_dynamic_array(_type, _item, _len) { \ .type = "__rel_loc " #_type "[]", .name = #_item, \ @@ -533,6 +554,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __rel_bitmask #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static struct trace_event_fields trace_event_fields_##call[] = { \ @@ -624,6 +648,12 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ @@ -788,6 +818,15 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __sockaddr +#define __sockaddr(field, len) __dynamic_array(u8, field, len) + +#undef __get_sockaddr +#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field)) + +#define __assign_sockaddr(dest, src, len) \ + memcpy(__get_dynamic_array(dest), src, len) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) \ __entry->__rel_loc_##item = __data_offsets.item; @@ -819,6 +858,16 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_rel_bitmask(dst, src, nr_bits) \ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __rel_sockaddr +#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len) + +#undef __get_rel_sockaddr +#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) + +#define __assign_rel_sockaddr(dest, src, len) \ + memcpy(__get_rel_dynamic_array(dest), src, len) + + #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -883,10 +932,12 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __get_sockaddr #undef __get_rel_dynamic_array #undef __get_rel_dynamic_array_len #undef __get_rel_str #undef __get_rel_bitmask +#undef __get_rel_sockaddr #undef __print_array #undef __print_hex_dump -- GitLab From c6ced22997ad56a05377221bded7bb30973a62f2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 25 Jan 2022 10:44:08 -0500 Subject: [PATCH 0841/1586] tracing: Update print fmt check to handle new __get_sockaddr() macro A helper macro was added to make reading socket addresses easier in trace events. It pairs %pISpc with __get_sockaddr() that reads the socket address from the ring buffer into a human readable format. The boot up check that makes sure that trace events do not reference pointers to memory that can later be freed when the trace event is read, incorrectly flagged this as a delayed reference. Update the check to handle "__get_sockaddr" and not report an error on it. Link: https://lore.kernel.org/all/20220125160505.068dbb52@canb.auug.org.au/ Reported-by: Stephen Rothwell Signed-off-by: Steven Rostedt (Google) Signed-off-by: Chuck Lever --- kernel/trace/trace_events.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3147614c1812a..f527ae807e77d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -384,6 +384,12 @@ static void test_event_printk(struct trace_event_call *call) if (!(dereference_flags & (1ULL << arg))) goto next_arg; + /* Check for __get_sockaddr */; + if (str_has_prefix(fmt + i, "__get_sockaddr(")) { + dereference_flags &= ~(1ULL << arg); + goto next_arg; + } + /* Find the REC-> in the argument */ c = strchr(fmt + i, ','); r = strstr(fmt + i, "REC->"); -- GitLab From 9db0e15fb32b7418608d9d45011837a049ec4a28 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 11 Jan 2022 12:44:55 -0500 Subject: [PATCH 0842/1586] NFSD: Use __sockaddr field to store socket addresses As an example usage of the new __sockaddr field, convert some NFSD trace points to use it. Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 79 ++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 5889db66409df..f0d2ccb633a68 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -613,20 +613,21 @@ TRACE_EVENT(nfsd_clid_cred_mismatch, __field(u32, cl_id) __field(unsigned long, cl_flavor) __field(unsigned long, new_flavor) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; __entry->cl_flavor = clp->cl_cred.cr_flavor; __entry->new_flavor = rqstp->rq_cred.cr_flavor; - memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); ), TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc", __entry->cl_boot, __entry->cl_id, show_nfsd_authflavor(__entry->cl_flavor), - show_nfsd_authflavor(__entry->new_flavor), __entry->addr + show_nfsd_authflavor(__entry->new_flavor), + __get_sockaddr(addr) ) ) @@ -642,7 +643,7 @@ TRACE_EVENT(nfsd_clid_verf_mismatch, __field(u32, cl_id) __array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE) __array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; @@ -651,14 +652,14 @@ TRACE_EVENT(nfsd_clid_verf_mismatch, NFS4_VERIFIER_SIZE); memcpy(__entry->new_verifier, (void *)verf, NFS4_VERIFIER_SIZE); - memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); ), TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc", __entry->cl_boot, __entry->cl_id, __print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE), __print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE), - __entry->addr + __get_sockaddr(addr) ) ); @@ -908,18 +909,17 @@ TRACE_EVENT(nfsd_cb_args, __field(u32, cl_id) __field(u32, prog) __field(u32, ident) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, conn->cb_addrlen) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; __entry->prog = conn->cb_prog; __entry->ident = conn->cb_ident; - memcpy(__entry->addr, &conn->cb_addr, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &conn->cb_addr, conn->cb_addrlen); ), TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u", - __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->prog, __entry->ident) ); @@ -951,17 +951,17 @@ DECLARE_EVENT_CLASS(nfsd_cb_class, __field(unsigned long, state) __field(u32, cl_boot) __field(u32, cl_id) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) ), TP_fast_assign( __entry->state = clp->cl_cb_state; __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) ), TP_printk("addr=%pISpc client %08x:%08x state=%s", - __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, show_cb_state(__entry->state)) ); @@ -1001,7 +1001,7 @@ TRACE_EVENT(nfsd_cb_setup, __field(u32, cl_boot) __field(u32, cl_id) __field(unsigned long, authflavor) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) __array(unsigned char, netid, 8) ), TP_fast_assign( @@ -1009,11 +1009,11 @@ TRACE_EVENT(nfsd_cb_setup, __entry->cl_id = clp->cl_clientid.cl_id; strlcpy(__entry->netid, netid, sizeof(__entry->netid)); __entry->authflavor = authflavor; - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) ), TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s", - __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->netid, show_nfsd_authflavor(__entry->authflavor)) ); @@ -1027,30 +1027,32 @@ TRACE_EVENT(nfsd_cb_setup_err, __field(long, error) __field(u32, cl_boot) __field(u32, cl_id) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) ), TP_fast_assign( __entry->error = error; __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) ), TP_printk("addr=%pISpc client %08x:%08x error=%ld", - __entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error) + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->error) ); -TRACE_EVENT(nfsd_cb_recall, +TRACE_EVENT_CONDITION(nfsd_cb_recall, TP_PROTO( const struct nfs4_stid *stid ), TP_ARGS(stid), + TP_CONDITION(stid->sc_client), TP_STRUCT__entry( __field(u32, cl_boot) __field(u32, cl_id) __field(u32, si_id) __field(u32, si_generation) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, stid->sc_client->cl_cb_conn.cb_addrlen) ), TP_fast_assign( const stateid_t *stp = &stid->sc_stateid; @@ -1060,14 +1062,11 @@ TRACE_EVENT(nfsd_cb_recall, __entry->cl_id = stp->si_opaque.so_clid.cl_id; __entry->si_id = stp->si_opaque.so_id; __entry->si_generation = stp->si_generation; - if (clp) - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); - else - memset(__entry->addr, 0, sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) ), TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x", - __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->si_id, __entry->si_generation) ); @@ -1081,7 +1080,7 @@ TRACE_EVENT(nfsd_cb_notify_lock, __field(u32, cl_boot) __field(u32, cl_id) __field(u32, fh_hash) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, lo->lo_owner.so_client->cl_cb_conn.cb_addrlen) ), TP_fast_assign( const struct nfs4_client *clp = lo->lo_owner.so_client; @@ -1089,11 +1088,11 @@ TRACE_EVENT(nfsd_cb_notify_lock, __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; __entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh); - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) ), TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x", - __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->fh_hash) ); @@ -1114,7 +1113,7 @@ TRACE_EVENT(nfsd_cb_offload, __field(u32, fh_hash) __field(int, status) __field(u64, count) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) ), TP_fast_assign( __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; @@ -1124,11 +1123,11 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash = knfsd_fh_hash(fh); __entry->status = be32_to_cpu(status); __entry->count = count; - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) ), TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d", - __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->si_id, __entry->si_generation, __entry->fh_hash, __entry->count, __entry->status) ); -- GitLab From c1a3f2ce66c80cd9f2a4376fa35a5c8d05441c73 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Oct 2021 14:53:30 -0400 Subject: [PATCH 0843/1586] NFSD: Remove NFSD_PROC_ARGS_* macros Clean up. The PROC_ARGS macros were added when I thought that NFSD tracepoints would be reporting endpoint information. However, tracepoints in the RPC server now report transport endpoint information, so in general there's no need for the upper layers to do that any more, and these macros can be retired. Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index f0d2ccb633a68..242fa123e0e94 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -13,22 +13,6 @@ #include "export.h" #include "nfsfh.h" -#define NFSD_TRACE_PROC_ARG_FIELDS \ - __field(unsigned int, netns_ino) \ - __field(u32, xid) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) - -#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \ - do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ - } while (0); - #define NFSD_TRACE_PROC_RES_FIELDS \ __field(unsigned int, netns_ino) \ __field(u32, xid) \ @@ -53,16 +37,22 @@ DECLARE_EVENT_CLASS(nfsd_xdr_err_class, ), TP_ARGS(rqstp), TP_STRUCT__entry( - NFSD_TRACE_PROC_ARG_FIELDS - + __field(unsigned int, netns_ino) + __field(u32, xid) __field(u32, vers) __field(u32, proc) + __sockaddr(server, rqstp->rq_xprt->xpt_locallen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) ), TP_fast_assign( - NFSD_TRACE_PROC_ARG_ASSIGNMENTS + const struct svc_xprt *xprt = rqstp->rq_xprt; + __entry->netns_ino = xprt->xpt_net->ns.inum; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->vers = rqstp->rq_vers; __entry->proc = rqstp->rq_proc; + __assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen); + __assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen); ), TP_printk("xid=0x%08x vers=%u proc=%u", __entry->xid, __entry->vers, __entry->proc -- GitLab From 26ce14e77a827fd73a650cffea4db7ddcc62ebc4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 24 Jan 2022 11:31:45 -0500 Subject: [PATCH 0844/1586] SUNRPC: Improve sockaddr handling in the svc_xprt_create_error trace point Clean up: Use the new __sockaddr field to record the socket address. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 29982d60b68ab..39ed91dca5aba 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1774,18 +1774,18 @@ TRACE_EVENT(svc_xprt_create_err, __field(long, error) __string(program, program) __string(protocol, protocol) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __sockaddr(addr, salen) ), TP_fast_assign( __entry->error = PTR_ERR(xprt); __assign_str(program, program); __assign_str(protocol, protocol); - memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr))); + __assign_sockaddr(addr, sap, salen); ), TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", - __entry->addr, __get_str(program), __get_str(protocol), + __get_sockaddr(addr), __get_str(program), __get_str(protocol), __entry->error) ); -- GitLab From aca3ed791553f1f9f994273a12b30c35b40f2769 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Oct 2021 17:07:44 -0400 Subject: [PATCH 0845/1586] SUNRPC: Same as SVC_RQST_ENDPOINT, but without the xid Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 112 +++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 41 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 39ed91dca5aba..84f5853196953 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1789,50 +1789,99 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error) ); +#define SVC_XPRT_ENDPOINT_FIELDS(x) \ + __sockaddr(server, (x)->xpt_locallen) \ + __sockaddr(client, (x)->xpt_remotelen) \ + __field(unsigned long, flags) \ + __field(unsigned int, netns_ino) + +#define SVC_XPRT_ENDPOINT_ASSIGNMENTS(x) \ + do { \ + __assign_sockaddr(server, &(x)->xpt_local, \ + (x)->xpt_locallen); \ + __assign_sockaddr(client, &(x)->xpt_remote, \ + (x)->xpt_remotelen); \ + __entry->flags = (x)->xpt_flags; \ + __entry->netns_ino = (x)->xpt_net->ns.inum; \ + } while (0) + +#define SVC_XPRT_ENDPOINT_FORMAT \ + "server=%pISpc client=%pISpc flags=%s" + +#define SVC_XPRT_ENDPOINT_VARARGS \ + __get_sockaddr(server), __get_sockaddr(client), \ + show_svc_xprt_flags(__entry->flags) + TRACE_EVENT(svc_xprt_enqueue, - TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), + TP_PROTO( + const struct svc_xprt *xprt, + const struct svc_rqst *rqst + ), TP_ARGS(xprt, rqst), TP_STRUCT__entry( + SVC_XPRT_ENDPOINT_FIELDS(xprt) + __field(int, pid) - __field(unsigned long, flags) - __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( + SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); + __entry->pid = rqst? rqst->rq_task->pid : 0; - __entry->flags = xprt->xpt_flags; - __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("addr=%s pid=%d flags=%s", __get_str(addr), - __entry->pid, show_svc_xprt_flags(__entry->flags)) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d", + SVC_XPRT_ENDPOINT_VARARGS, __entry->pid) +); + +TRACE_EVENT(svc_xprt_dequeue, + TP_PROTO( + const struct svc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt) + + __field(unsigned long, wakeup) + ), + + TP_fast_assign( + SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + + __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_qtime)); + ), + + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu", + SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) ); DECLARE_EVENT_CLASS(svc_xprt_event, - TP_PROTO(struct svc_xprt *xprt), + TP_PROTO( + const struct svc_xprt *xprt + ), TP_ARGS(xprt), TP_STRUCT__entry( - __field(unsigned long, flags) - __string(addr, xprt->xpt_remotebuf) + SVC_XPRT_ENDPOINT_FIELDS(xprt) ), TP_fast_assign( - __entry->flags = xprt->xpt_flags; - __assign_str(addr, xprt->xpt_remotebuf); + SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); ), - TP_printk("addr=%s flags=%s", __get_str(addr), - show_svc_xprt_flags(__entry->flags)) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS) ); #define DEFINE_SVC_XPRT_EVENT(name) \ DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \ TP_PROTO( \ - struct svc_xprt *xprt \ + const struct svc_xprt *xprt \ ), \ TP_ARGS(xprt)) @@ -1850,44 +1899,25 @@ TRACE_EVENT(svc_xprt_accept, TP_ARGS(xprt, service), TP_STRUCT__entry( - __string(addr, xprt->xpt_remotebuf) + SVC_XPRT_ENDPOINT_FIELDS(xprt) + __string(protocol, xprt->xpt_class->xcl_name) __string(service, service) ), TP_fast_assign( - __assign_str(addr, xprt->xpt_remotebuf); + SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); + __assign_str(protocol, xprt->xpt_class->xcl_name); __assign_str(service, service); ), - TP_printk("addr=%s protocol=%s service=%s", - __get_str(addr), __get_str(protocol), __get_str(service) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " protocol=%s service=%s", + SVC_XPRT_ENDPOINT_VARARGS, + __get_str(protocol), __get_str(service) ) ); -TRACE_EVENT(svc_xprt_dequeue, - TP_PROTO(struct svc_rqst *rqst), - - TP_ARGS(rqst), - - TP_STRUCT__entry( - __field(unsigned long, flags) - __field(unsigned long, wakeup) - __string(addr, rqst->rq_xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->flags = rqst->rq_xprt->xpt_flags; - __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_qtime)); - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr), - show_svc_xprt_flags(__entry->flags), __entry->wakeup) -); - TRACE_EVENT(svc_wake_up, TP_PROTO(int pid), -- GitLab From 70a60cbfb613d8f6ffd1d9ade187d0a868066500 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Oct 2021 13:47:24 -0400 Subject: [PATCH 0846/1586] SUNRPC: Record endpoint information in trace log To make server-side trace events more useful in container-ized environments, capture not just the remote's IP address, but the local IP address and network namespace as well. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 126 +++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 48 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 84f5853196953..ab8ae1f6ba84d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1625,26 +1625,53 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE); { SVC_PENDING, "SVC_PENDING" }, \ { SVC_COMPLETE, "SVC_COMPLETE" }) +#define SVC_RQST_ENDPOINT_FIELDS(r) \ + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) + +#define SVC_RQST_ENDPOINT_ASSIGNMENTS(r) \ + do { \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ + __entry->netns_ino = xprt->xpt_net->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ + } while (0) + +#define SVC_RQST_ENDPOINT_FORMAT \ + "xid=0x%08x server=%pISpc client=%pISpc" + +#define SVC_RQST_ENDPOINT_VARARGS \ + __entry->xid, __get_sockaddr(server), __get_sockaddr(client) + TRACE_EVENT(svc_authenticate, TP_PROTO(const struct svc_rqst *rqst, int auth_res), TP_ARGS(rqst, auth_res), TP_STRUCT__entry( - __field(u32, xid) + SVC_RQST_ENDPOINT_FIELDS(rqst) + __field(unsigned long, svc_status) __field(unsigned long, auth_stat) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + __entry->svc_status = auth_res; __entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat); ), - TP_printk("xid=0x%08x auth_res=%s auth_stat=%s", - __entry->xid, svc_show_status(__entry->svc_status), - rpc_show_auth_stat(__entry->auth_stat)) + TP_printk(SVC_RQST_ENDPOINT_FORMAT + " auth_res=%s auth_stat=%s", + SVC_RQST_ENDPOINT_VARARGS, + svc_show_status(__entry->svc_status), + rpc_show_auth_stat(__entry->auth_stat)) ); TRACE_EVENT(svc_process, @@ -1680,7 +1707,6 @@ TRACE_EVENT(svc_process, ); DECLARE_EVENT_CLASS(svc_rqst_event, - TP_PROTO( const struct svc_rqst *rqst ), @@ -1688,20 +1714,20 @@ DECLARE_EVENT_CLASS(svc_rqst_event, TP_ARGS(rqst), TP_STRUCT__entry( - __field(u32, xid) + SVC_RQST_ENDPOINT_FIELDS(rqst) + __field(unsigned long, flags) - __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + __entry->flags = rqst->rq_flags; - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x flags=%s", - __get_str(addr), __entry->xid, - show_rqstp_flags(__entry->flags)) + TP_printk(SVC_RQST_ENDPOINT_FORMAT " flags=%s", + SVC_RQST_ENDPOINT_VARARGS, + show_rqstp_flags(__entry->flags)) ); #define DEFINE_SVC_RQST_EVENT(name) \ DEFINE_EVENT(svc_rqst_event, svc_##name, \ @@ -1714,34 +1740,63 @@ DEFINE_SVC_RQST_EVENT(defer); DEFINE_SVC_RQST_EVENT(drop); DECLARE_EVENT_CLASS(svc_rqst_status, - - TP_PROTO(struct svc_rqst *rqst, int status), + TP_PROTO( + const struct svc_rqst *rqst, + int status + ), TP_ARGS(rqst, status), TP_STRUCT__entry( - __field(u32, xid) + SVC_RQST_ENDPOINT_FIELDS(rqst) + __field(int, status) __field(unsigned long, flags) - __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + __entry->status = status; __entry->flags = rqst->rq_flags; - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x status=%d flags=%s", - __get_str(addr), __entry->xid, - __entry->status, show_rqstp_flags(__entry->flags)) + TP_printk(SVC_RQST_ENDPOINT_FORMAT " status=%d flags=%s", + SVC_RQST_ENDPOINT_VARARGS, + __entry->status, show_rqstp_flags(__entry->flags)) ); DEFINE_EVENT(svc_rqst_status, svc_send, - TP_PROTO(struct svc_rqst *rqst, int status), + TP_PROTO(const struct svc_rqst *rqst, int status), TP_ARGS(rqst, status)); +TRACE_EVENT(svc_stats_latency, + TP_PROTO( + const struct svc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + SVC_RQST_ENDPOINT_FIELDS(rqst) + + __field(unsigned long, execute) + __string(procedure, svc_proc_name(rqst)) + ), + + TP_fast_assign( + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); + + __entry->execute = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_stime)); + __assign_str(procedure, svc_proc_name(rqst)); + ), + + TP_printk(SVC_RQST_ENDPOINT_FORMAT " proc=%s execute-us=%lu", + SVC_RQST_ENDPOINT_VARARGS, + __get_str(procedure), __entry->execute) +); + #define show_svc_xprt_flags(flags) \ __print_flags(flags, "|", \ { (1UL << XPT_BUSY), "XPT_BUSY"}, \ @@ -1952,31 +2007,6 @@ TRACE_EVENT(svc_alloc_arg_err, TP_printk("pages=%u", __entry->pages) ); -TRACE_EVENT(svc_stats_latency, - TP_PROTO(const struct svc_rqst *rqst), - - TP_ARGS(rqst), - - TP_STRUCT__entry( - __field(u32, xid) - __field(unsigned long, execute) - __string(procedure, svc_proc_name(rqst)) - __string(addr, rqst->rq_xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->execute = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_stime)); - __assign_str(procedure, svc_proc_name(rqst)); - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s xid=0x%08x proc=%s execute-us=%lu", - __get_str(addr), __entry->xid, __get_str(procedure), - __entry->execute) -); - DECLARE_EVENT_CLASS(svc_deferred_event, TP_PROTO( const struct svc_deferred_req *dr -- GitLab From a9ff2e99e9fa501ec965da03c18a5422b37a2f44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 10:17:59 -0500 Subject: [PATCH 0847/1586] SUNRPC: Remove the .svo_enqueue_xprt method We have never been able to track down and address the underlying cause of the performance issues with workqueue-based service support. svo_enqueue_xprt is called multiple times per RPC, so it adds instruction path length, but always ends up at the same function: svc_xprt_do_enqueue(). We do not anticipate needing this flexibility for dynamic nfsd thread management support. As a micro-optimization, remove .svo_enqueue_xprt because Spectre/Meltdown makes virtual function calls more costly. This change essentially reverts commit b9e13cdfac70 ("nfsd/sunrpc: turn enqueueing a svc_xprt into a svc_serv operation"). Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 1 - fs/nfs/callback.c | 2 -- fs/nfsd/nfssvc.c | 1 - include/linux/sunrpc/svc.h | 3 --- include/linux/sunrpc/svc_xprt.h | 1 - net/sunrpc/svc_xprt.c | 10 +++++----- 6 files changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 0475c5a5d061e..3a05af8736259 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -353,7 +353,6 @@ static struct notifier_block lockd_inet6addr_notifier = { static const struct svc_serv_ops lockd_sv_ops = { .svo_shutdown = svc_rpcb_cleanup, .svo_function = lockd, - .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_module = THIS_MODULE, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 054cc1255fac6..7a810f8850632 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -234,13 +234,11 @@ err_bind: static const struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, - .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) static const struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, - .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_module = THIS_MODULE, }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b8c682b62d299..aeeac6de1f0ac 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -615,7 +615,6 @@ static int nfsd_get_default_max_blksize(void) static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, - .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_module = THIS_MODULE, }; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f35c22b3355ff..6ef9c1cafd0b2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -61,9 +61,6 @@ struct svc_serv_ops { /* function for service threads to run */ int (*svo_function)(void *); - /* queue up a transport for servicing */ - void (*svo_enqueue_xprt)(struct svc_xprt *); - /* optional module to count when adding threads. * Thread function must call module_put_and_kthread_exit() to exit. */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 571f605bc91ef..a3ba027fb4ba7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -131,7 +131,6 @@ int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int, const struct cred *); void svc_xprt_received(struct svc_xprt *xprt); -void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b21ad79941474..9fce4f7774bb9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -32,6 +32,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(struct timer_list *t); static void svc_delete_xprt(struct svc_xprt *xprt); +static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -266,12 +267,12 @@ void svc_xprt_received(struct svc_xprt *xprt) } /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_enqueue_xprt with: + * 'put', so we need a reference to call svc_xprt_do_enqueue with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); + svc_xprt_do_enqueue(xprt); svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_received); @@ -423,7 +424,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) return false; } -void svc_xprt_do_enqueue(struct svc_xprt *xprt) +static void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -467,7 +468,6 @@ out_unlock: put_cpu(); trace_svc_xprt_enqueue(xprt, rqstp); } -EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); /* * Queue up a transport with data pending. If there are idle nfsd @@ -478,7 +478,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) { if (test_bit(XPT_BUSY, &xprt->xpt_flags)) return; - xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); + svc_xprt_do_enqueue(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); -- GitLab From c0219c499799c1e92bd570c15a47e6257a27bb15 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 17:57:23 -0500 Subject: [PATCH 0848/1586] SUNRPC: Merge svc_do_enqueue_xprt() into svc_enqueue_xprt() Neil says: "These functions were separated in commit 0971374e2818 ("SUNRPC: Reduce contention in svc_xprt_enqueue()") so that the XPT_BUSY check happened before taking any spinlocks. We have since moved or removed the spinlocks so the extra test is fairly pointless." I've made this a separate patch in case the XPT_BUSY change has unexpected consequences and needs to be reverted. Suggested-by: Neil Brown Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9fce4f7774bb9..1c2295209d083 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -32,7 +32,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(struct timer_list *t); static void svc_delete_xprt(struct svc_xprt *xprt); -static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -267,12 +266,12 @@ void svc_xprt_received(struct svc_xprt *xprt) } /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_do_enqueue with: + * 'put', so we need a reference to call svc_xprt_enqueue with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_do_enqueue(xprt); + svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_received); @@ -412,6 +411,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); + if (xpt_flags & BIT(XPT_BUSY)) + return false; if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { @@ -424,7 +425,12 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) return false; } -static void svc_xprt_do_enqueue(struct svc_xprt *xprt) +/** + * svc_xprt_enqueue - Queue a transport on an idle nfsd thread + * @xprt: transport with data pending + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -468,18 +474,6 @@ out_unlock: put_cpu(); trace_svc_xprt_enqueue(xprt, rqstp); } - -/* - * Queue up a transport with data pending. If there are idle nfsd - * processes, wake 'em up. - * - */ -void svc_xprt_enqueue(struct svc_xprt *xprt) -{ - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) - return; - svc_xprt_do_enqueue(xprt); -} EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* -- GitLab From 87cdd8641c8a1ec6afd2468265e20840a57fd888 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 13:49:29 -0500 Subject: [PATCH 0849/1586] SUNRPC: Remove svo_shutdown method Clean up. Neil observed that "any code that calls svc_shutdown_net() knows what the shutdown function should be, and so can call it directly." Signed-off-by: Chuck Lever Reviewed-by: NeilBrown --- fs/lockd/svc.c | 5 ++--- fs/nfsd/nfssvc.c | 2 +- include/linux/sunrpc/svc.h | 3 --- net/sunrpc/svc.c | 3 --- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 3a05af8736259..f5b688a844aa5 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -249,6 +249,7 @@ out_err: printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); svc_shutdown_net(serv, net); + svc_rpcb_cleanup(serv, net); return err; } @@ -287,8 +288,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); - dprintk("%s: per-net data destroyed; net=%x\n", - __func__, net->ns.inum); + svc_rpcb_cleanup(serv, net); } } else { pr_err("%s: no users! net=%x\n", @@ -351,7 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = { #endif static const struct svc_serv_ops lockd_sv_ops = { - .svo_shutdown = svc_rpcb_cleanup, .svo_function = lockd, .svo_module = THIS_MODULE, }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index aeeac6de1f0ac..0c6b216e439ec 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -613,7 +613,6 @@ static int nfsd_get_default_max_blksize(void) } static const struct svc_serv_ops nfsd_thread_sv_ops = { - .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_module = THIS_MODULE, }; @@ -724,6 +723,7 @@ void nfsd_put(struct net *net) if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) { svc_shutdown_net(nn->nfsd_serv, net); + nfsd_last_thread(nn->nfsd_serv, net); svc_destroy(&nn->nfsd_serv->sv_refcnt); spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6ef9c1cafd0b2..63794d772eb33 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -55,9 +55,6 @@ struct svc_pool { struct svc_serv; struct svc_serv_ops { - /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *, struct net *); - /* function for service threads to run */ int (*svo_function)(void *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2aabec2b4becc..74a75a22da9a5 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -539,9 +539,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_shutdown_net(struct svc_serv *serv, struct net *net) { svc_close_net(serv, net); - - if (serv->sv_ops->svo_shutdown) - serv->sv_ops->svo_shutdown(serv, net); } EXPORT_SYMBOL_GPL(svc_shutdown_net); -- GitLab From 352ad31448fecc78a2e9b78da64eea5d63b8d0ce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 26 Jan 2022 11:42:08 -0500 Subject: [PATCH 0850/1586] SUNRPC: Rename svc_create_xprt() Clean up: Use the "svc_xprt_" function naming convention as is used for other external APIs. Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 4 ++-- fs/nfs/callback.c | 12 ++++++------ fs/nfsd/nfsctl.c | 8 ++++---- fs/nfsd/nfssvc.c | 8 ++++---- include/linux/sunrpc/svc_xprt.h | 7 ++++--- net/sunrpc/svc_xprt.c | 24 +++++++++++++++++++----- 6 files changed, 39 insertions(+), 24 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f5b688a844aa5..bba6f2b45b64a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -197,8 +197,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, net, family, port, - SVC_SOCK_DEFAULTS, cred); + return svc_xprt_create(serv, name, net, family, port, + SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 7a810f8850632..c1a8767100ae9 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -45,18 +45,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) int ret; struct nfs_net *nn = net_generic(net, nfs_net_id); - ret = svc_create_xprt(serv, "tcp", net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, - cred); + ret = svc_xprt_create(serv, "tcp", net, PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", nn->nfs_callback_tcpport, PF_INET, net->ns.inum); - ret = svc_create_xprt(serv, "tcp", net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, - cred); + ret = svc_xprt_create(serv, "tcp", net, PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 68b020f2002b7..8fec779994f7b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -772,13 +772,13 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (err != 0) return err; - err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_xprt_create(nn->nfsd_serv, transport, net, + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; - err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_xprt_create(nn->nfsd_serv, transport, net, + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0c6b216e439ec..ae25b7b3af99e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -293,13 +293,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred) if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; - error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index a3ba027fb4ba7..a7f6f17c3dc5e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -127,9 +127,10 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int, - const struct cred *); +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags, + const struct cred *cred); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1c2295209d083..44be7193cd9b4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -285,7 +285,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, +static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) @@ -321,21 +321,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, return -EPROTONOSUPPORT; } -int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, +/** + * svc_xprt_create - Add a new listener to @serv + * @serv: target RPC service + * @xprt_name: transport class name + * @net: network namespace + * @family: network address family + * @port: listener port + * @flags: SVC_SOCK flags + * @cred: credential to bind to this transport + * + * Return values: + * %0: New listener added successfully + * %-EPROTONOSUPPORT: Requested transport type not supported + */ +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) { int err; - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); + err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); + err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); } return err; } -EXPORT_SYMBOL_GPL(svc_create_xprt); +EXPORT_SYMBOL_GPL(svc_xprt_create); /* * Copy the local and remote xprt addresses to the rqstp structure -- GitLab From 4355d767a21b9445958fc11bce9a9701f76529d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 31 Jan 2022 13:34:29 -0500 Subject: [PATCH 0851/1586] SUNRPC: Rename svc_close_xprt() Clean up: Use the "svc_xprt_" function naming convention as is used for other external APIs. Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/svc_xprt.h | 2 +- net/sunrpc/svc.c | 2 +- net/sunrpc/svc_xprt.c | 9 +++++++-- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8fec779994f7b..16920e4512bde 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -790,7 +790,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr out_close: xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { - svc_close_xprt(xprt); + svc_xprt_close(xprt); svc_xprt_put(xprt); } out_err: diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index a7f6f17c3dc5e..bf7d029fb48c0 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -135,7 +135,7 @@ void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_close_xprt(struct svc_xprt *xprt); +void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 74a75a22da9a5..53efef3db3a9c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1352,7 +1352,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_authorise(rqstp); close_xprt: if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) - svc_close_xprt(rqstp->rq_xprt); + svc_xprt_close(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); return 0; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 44be7193cd9b4..6809116c996a6 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1068,7 +1068,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt) svc_xprt_put(xprt); } -void svc_close_xprt(struct svc_xprt *xprt) +/** + * svc_xprt_close - Close a client connection + * @xprt: transport to disconnect + * + */ +void svc_xprt_close(struct svc_xprt *xprt) { trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); @@ -1083,7 +1088,7 @@ void svc_close_xprt(struct svc_xprt *xprt) */ svc_delete_xprt(xprt); } -EXPORT_SYMBOL_GPL(svc_close_xprt); +EXPORT_SYMBOL_GPL(svc_xprt_close); static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 16897fcb659c1..85c8cdda98b18 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) ret = rpcrdma_bc_send_request(rdma, rqst); if (ret == -ENOTCONN) - svc_close_xprt(sxprt); + svc_xprt_close(sxprt); return ret; } -- GitLab From c7d7ec8f043e53ad16e30f5ebb8b9df415ec0f2b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 26 Jan 2022 11:30:55 -0500 Subject: [PATCH 0852/1586] SUNRPC: Remove svc_shutdown_net() Clean up: svc_shutdown_net() now does nothing but call svc_close_net(). Replace all external call sites. svc_close_net() is renamed to be the inverse of svc_xprt_create(). Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 4 ++-- fs/nfs/callback.c | 2 +- fs/nfsd/nfssvc.c | 2 +- include/linux/sunrpc/svc.h | 1 - include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc.c | 6 ------ net/sunrpc/svc_xprt.c | 9 +++++++-- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index bba6f2b45b64a..c83ec4a375bc1 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -248,7 +248,7 @@ out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); - svc_shutdown_net(serv, net); + svc_xprt_destroy_all(serv, net); svc_rpcb_cleanup(serv, net); return err; } @@ -287,7 +287,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - svc_shutdown_net(serv, net); + svc_xprt_destroy_all(serv, net); svc_rpcb_cleanup(serv, net); } } else { diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c1a8767100ae9..c98c68513590f 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -189,7 +189,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc return; dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); - svc_shutdown_net(serv, net); + svc_xprt_destroy_all(serv, net); } static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ae25b7b3af99e..b92d272f4ba6b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -722,7 +722,7 @@ void nfsd_put(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) { - svc_shutdown_net(nn->nfsd_serv, net); + svc_xprt_destroy_all(nn->nfsd_serv, net); nfsd_last_thread(nn->nfsd_serv, net); svc_destroy(&nn->nfsd_serv->sv_refcnt); spin_lock(&nfsd_notifier_lock); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 63794d772eb33..5603158b2aa71 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -508,7 +508,6 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index bf7d029fb48c0..42e113742429b 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -131,6 +131,7 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 53efef3db3a9c..08d6847464529 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -536,12 +536,6 @@ out_err: } EXPORT_SYMBOL_GPL(svc_create_pooled); -void svc_shutdown_net(struct svc_serv *serv, struct net *net) -{ - svc_close_net(serv, net); -} -EXPORT_SYMBOL_GPL(svc_shutdown_net); - /* * Destroy an RPC service. Should be called with appropriate locking to * protect sv_permsocks and sv_tempsocks. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 6809116c996a6..0c117d3bfda80 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1140,7 +1140,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) } } -/* +/** + * svc_xprt_destroy_all - Destroy transports associated with @serv + * @serv: RPC service to be shut down + * @net: target network namespace + * * Server threads may still be running (especially in the case where the * service is still running in other network namespaces). * @@ -1152,7 +1156,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) * threads, we may need to wait a little while and then check again to * see if they're done. */ -void svc_close_net(struct svc_serv *serv, struct net *net) +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) { int delay = 0; @@ -1163,6 +1167,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net) msleep(delay++); } } +EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); /* * Handle defer and revisit of requests -- GitLab From f49169c97fceb21ad6a0aaf671c50b0f520f15a5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 16 Feb 2022 12:31:09 -0500 Subject: [PATCH 0853/1586] NFSD: Remove svc_serv_ops::svo_module struct svc_serv_ops is about to be removed. Neil Brown says: > I suspect svo_module can go as well - I don't think the thread is > ever the thing that primarily keeps a module active. A random sample of kthread_create() callers shows sunrpc is the only one that manages module reference count in this way. Suggested-by: Neil Brown Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 4 +--- fs/nfs/callback.c | 7 ++----- fs/nfs/nfs4state.c | 1 - fs/nfsd/nfssvc.c | 3 --- include/linux/sunrpc/svc.h | 5 ----- kernel/module.c | 2 +- net/sunrpc/svc.c | 2 -- 7 files changed, 4 insertions(+), 20 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index c83ec4a375bc1..bfde31124f3af 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -184,8 +184,7 @@ lockd(void *vrqstp) dprintk("lockd_down: service stopped\n"); svc_exit_thread(rqstp); - - module_put_and_kthread_exit(0); + return 0; } static int create_lockd_listener(struct svc_serv *serv, const char *name, @@ -352,7 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = { static const struct svc_serv_ops lockd_sv_ops = { .svo_function = lockd, - .svo_module = THIS_MODULE, }; static int lockd_get(void) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c98c68513590f..a494f9e7bd0a0 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -92,8 +91,8 @@ nfs4_callback_svc(void *vrqstp) continue; svc_process(rqstp); } + svc_exit_thread(rqstp); - module_put_and_kthread_exit(0); return 0; } @@ -136,8 +135,8 @@ nfs41_callback_svc(void *vrqstp) finish_wait(&serv->sv_cb_waitq, &wq); } } + svc_exit_thread(rqstp); - module_put_and_kthread_exit(0); return 0; } @@ -234,12 +233,10 @@ err_bind: static const struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, - .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) static const struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, - .svo_module = THIS_MODULE, }; static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f5a62c0d999b4..02a899e4390fa 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2697,6 +2697,5 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); nfs4_state_manager(clp); nfs_put_client(clp); - module_put_and_kthread_exit(0); return 0; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b92d272f4ba6b..544187a8a22bb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -614,7 +614,6 @@ static int nfsd_get_default_max_blksize(void) static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_function = nfsd, - .svo_module = THIS_MODULE, }; void nfsd_shutdown_threads(struct net *net) @@ -1018,8 +1017,6 @@ out: msleep(20); } - /* Release module */ - module_put_and_kthread_exit(0); return 0; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5603158b2aa71..dfc9283f412f9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -57,11 +57,6 @@ struct svc_serv; struct svc_serv_ops { /* function for service threads to run */ int (*svo_function)(void *); - - /* optional module to count when adding threads. - * Thread function must call module_put_and_kthread_exit() to exit. - */ - struct module *svo_module; }; /* diff --git a/kernel/module.c b/kernel/module.c index 46a5c2ed19285..6cea788fd965c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -335,7 +335,7 @@ static inline void add_taint_module(struct module *mod, unsigned flag, /* * A thread that wants to hold a reference to a module only while it - * is running can call this to safely exit. nfsd and lockd use this. + * is running can call this to safely exit. */ void __noreturn __module_put_and_kthread_exit(struct module *mod, long code) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08d6847464529..a90d555aa163a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -736,11 +736,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - __module_get(serv->sv_ops->svo_module); task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { - module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); return PTR_ERR(task); } -- GitLab From 37902c6313090235c847af89c5515591261ee338 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 16 Feb 2022 12:16:27 -0500 Subject: [PATCH 0854/1586] NFSD: Move svc_serv_ops::svo_function into struct svc_serv Hoist svo_function back into svc_serv and remove struct svc_serv_ops, since the struct is now devoid of fields. Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 6 +----- fs/nfs/callback.c | 43 ++++++++++---------------------------- fs/nfsd/nfssvc.c | 7 +------ include/linux/sunrpc/svc.h | 14 ++++--------- net/sunrpc/svc.c | 37 ++++++++++++++++++++++---------- 5 files changed, 43 insertions(+), 64 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index bfde31124f3af..59ef8a1f843f3 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -349,10 +349,6 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static const struct svc_serv_ops lockd_sv_ops = { - .svo_function = lockd, -}; - static int lockd_get(void) { struct svc_serv *serv; @@ -376,7 +372,7 @@ static int lockd_get(void) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); return -ENOMEM; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a494f9e7bd0a0..456af7d230cf1 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -231,29 +231,10 @@ err_bind: return ret; } -static const struct svc_serv_ops nfs40_cb_sv_ops = { - .svo_function = nfs4_callback_svc, -}; -#if defined(CONFIG_NFS_V4_1) -static const struct svc_serv_ops nfs41_cb_sv_ops = { - .svo_function = nfs41_callback_svc, -}; - -static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { - [0] = &nfs40_cb_sv_ops, - [1] = &nfs41_cb_sv_ops, -}; -#else -static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { - [0] = &nfs40_cb_sv_ops, - [1] = NULL, -}; -#endif - static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - const struct svc_serv_ops *sv_ops; + int (*threadfn)(void *data); struct svc_serv *serv; /* @@ -262,17 +243,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) if (cb_info->serv) return svc_get(cb_info->serv); - switch (minorversion) { - case 0: - sv_ops = nfs4_cb_sv_ops[0]; - break; - default: - sv_ops = nfs4_cb_sv_ops[1]; - } - - if (sv_ops == NULL) - return ERR_PTR(-ENOTSUPP); - /* * Sanity check: if there's no task, * we should be the first user ... @@ -281,7 +251,16 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); + threadfn = nfs4_callback_svc; +#if defined(CONFIG_NFS_V4_1) + if (minorversion) + threadfn = nfs41_callback_svc; +#else + if (minorversion) + return ERR_PTR(-ENOTSUPP); +#endif + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, + threadfn); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 544187a8a22bb..5abbe5d1c77ff 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -612,10 +612,6 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static const struct svc_serv_ops nfsd_thread_sv_ops = { - .svo_function = nfsd, -}; - void nfsd_shutdown_threads(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -654,8 +650,7 @@ int nfsd_create_serv(struct net *net) if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_thread_sv_ops); + serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); if (serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dfc9283f412f9..a5dda4987e8ba 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -52,13 +52,6 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; -struct svc_serv; - -struct svc_serv_ops { - /* function for service threads to run */ - int (*svo_function)(void *); -}; - /* * RPC service. * @@ -91,7 +84,8 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - const struct svc_serv_ops *sv_ops; /* server operations */ + int (*sv_threadfn)(void *data); + #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -492,7 +486,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, @@ -500,7 +494,7 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); int svc_process(struct svc_rqst *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a90d555aa163a..5570040175480 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -448,7 +448,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - const struct svc_serv_ops *ops) + int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int vers; @@ -465,7 +465,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_ops = ops; + serv->sv_threadfn = threadfn; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -511,22 +511,37 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return serv; } -struct svc_serv * -svc_create(struct svc_program *prog, unsigned int bufsize, - const struct svc_serv_ops *ops) +/** + * svc_create - Create an RPC service + * @prog: the RPC program the new service will handle + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ +struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, + int (*threadfn)(void *data)) { - return __svc_create(prog, bufsize, /*npools*/1, ops); + return __svc_create(prog, bufsize, 1, threadfn); } EXPORT_SYMBOL_GPL(svc_create); -struct svc_serv * -svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - const struct svc_serv_ops *ops) +/** + * svc_create_pooled - Create an RPC service with pooled threads + * @prog: the RPC program the new service will handle + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ +struct svc_serv *svc_create_pooled(struct svc_program *prog, + unsigned int bufsize, + int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, ops); + serv = __svc_create(prog, bufsize, npools, threadfn); if (!serv) goto out_err; return serv; @@ -736,7 +751,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, + task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { svc_exit_thread(rqstp); -- GitLab From 74aaf96feaca80285912cc6f19575b3e97177918 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Feb 2022 13:10:52 -0500 Subject: [PATCH 0855/1586] SUNRPC: Teach server to recognize RPC_AUTH_TLS Initial support for the RPC_AUTH_TLS authentication flavor enables NFSD to eventually accept an RPC_AUTH_TLS probe from clients. This patch simply prevents NFSD from rejecting these probes completely. In the meantime, graft this support in now so that RPC_AUTH_TLS support keeps up with generic code and API changes in the RPC server. Down the road, server-side transport implementations will populate xpo_start_tls when they can support RPC-with-TLS. For example, TCP will eventually populate it, but RDMA won't. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svcauth.c | 2 ++ net/sunrpc/svcauth_unix.c | 60 +++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 42e113742429b..20068ccfd0cc0 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -28,6 +28,7 @@ struct svc_xprt_ops { void (*xpo_free)(struct svc_xprt *); void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); + void (*xpo_start_tls)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 5a8b8e03fdd42..e72ba2f13f6c6 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -31,10 +31,12 @@ */ extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; +extern struct auth_ops svcauth_tls; static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = { [RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null, [RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix, + [RPC_AUTH_TLS] = (struct auth_ops __force __rcu *)&svcauth_tls, }; static struct auth_ops * diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d7ed7d49115ac..b1efc34db6ed8 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -37,6 +37,7 @@ struct unix_domain { extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; +extern struct auth_ops svcauth_tls; static void svcauth_unix_domain_release_rcu(struct rcu_head *head) { @@ -788,6 +789,65 @@ struct auth_ops svcauth_null = { }; +static int +svcauth_tls_accept(struct svc_rqst *rqstp) +{ + struct svc_cred *cred = &rqstp->rq_cred; + struct kvec *argv = rqstp->rq_arg.head; + struct kvec *resv = rqstp->rq_res.head; + + if (argv->iov_len < XDR_UNIT * 3) + return SVC_GARBAGE; + + /* Call's cred length */ + if (svc_getu32(argv) != xdr_zero) { + rqstp->rq_auth_stat = rpc_autherr_badcred; + return SVC_DENIED; + } + + /* Call's verifier flavor and its length */ + if (svc_getu32(argv) != rpc_auth_null || + svc_getu32(argv) != xdr_zero) { + rqstp->rq_auth_stat = rpc_autherr_badverf; + return SVC_DENIED; + } + + /* AUTH_TLS is not valid on non-NULL procedures */ + if (rqstp->rq_proc != 0) { + rqstp->rq_auth_stat = rpc_autherr_badcred; + return SVC_DENIED; + } + + /* Mapping to nobody uid/gid is required */ + cred->cr_uid = INVALID_UID; + cred->cr_gid = INVALID_GID; + cred->cr_group_info = groups_alloc(0); + if (cred->cr_group_info == NULL) + return SVC_CLOSE; /* kmalloc failure - client must retry */ + + /* Reply's verifier */ + svc_putnl(resv, RPC_AUTH_NULL); + if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { + svc_putnl(resv, 8); + memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8); + resv->iov_len += 8; + } else + svc_putnl(resv, 0); + + rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS; + return SVC_OK; +} + +struct auth_ops svcauth_tls = { + .name = "tls", + .owner = THIS_MODULE, + .flavour = RPC_AUTH_TLS, + .accept = svcauth_tls_accept, + .release = svcauth_null_release, + .set_client = svcauth_unix_set_client, +}; + + static int svcauth_unix_accept(struct svc_rqst *rqstp) { -- GitLab From 4d2eeafecd6c83b4444db3dc0ada201c89b1aa44 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 24 Feb 2022 18:17:05 +0200 Subject: [PATCH 0856/1586] nfsd: more robust allocation failure handling in nfsd_file_cache_init The nfsd file cache table can be pretty large and its allocation may require as many as 80 contigious pages. Employ the same fix that was employed for similar issue that was reported for the reply cache hash table allocation several years ago by commit 8f97514b423a ("nfsd: more robust allocation failure handling in nfsd_reply_cache_init"). Fixes: 65294c1f2c5e ("nfsd: add a new struct file caching facility to nfsd") Link: https://lore.kernel.org/linux-nfs/e3cdaeec85a6cfec980e87fc294327c0381c1778.camel@kernel.org/ Suggested-by: Jeff Layton Signed-off-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Tested-by: Amir Goldstein --- fs/nfsd/filecache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 8bc807c5fea4c..cc2831cec6695 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -632,7 +632,7 @@ nfsd_file_cache_init(void) if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, + nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); @@ -700,7 +700,7 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); + kvfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; @@ -811,7 +811,7 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); + kvfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; -- GitLab From 70868c6b8fd80db585da57a264c50a69af8fd3c3 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 28 Feb 2022 10:56:41 +0800 Subject: [PATCH 0857/1586] docs: fix 'make htmldocs' warning in SCTP.rst Fix following 'make htmldocs' warnings: ./Documentation/security/SCTP.rst:123: WARNING: Title underline too short. security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./Documentation/security/SCTP.rst:123: WARNING: Title underline too short. security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./Documentation/security/SCTP.rst:273: WARNING: Title underline too short. security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./Documentation/security/SCTP.rst:273: WARNING: Title underline too short. security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 5e50f5d4ff31 ("security: add sctp_assoc_established hook") Signed-off-by: Wan Jiabing Reviewed-by: Xin Long Signed-off-by: Paul Moore --- Documentation/security/SCTP.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index 406cc68b88087..b73eb764a0017 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -120,7 +120,7 @@ calls **sctp_peeloff**\(3). security_sctp_assoc_established() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Called when a COOKIE ACK is received, and the peer secid will be saved into ``@asoc->peer_secid`` for client:: @@ -270,7 +270,7 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and security_sctp_assoc_established() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Called when a COOKIE ACK is received where it sets the connection's peer sid to that in ``@skb``:: -- GitLab From a5cd1ab7ab679d252a6d2f483eee7d45ebf2040c Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Mon, 28 Feb 2022 15:45:32 -0800 Subject: [PATCH 0858/1586] Fix incorrect type in assignment of ipv6 port for audit Remove inappropriate use of ntohs() and assign the port value directly. Reported-by: kernel test robot Signed-off-by: Casey Schaufler --- security/smack/smack_lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 14b279cc75c96..6207762dbdb13 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2510,7 +2510,7 @@ static int smk_ipv6_check(struct smack_known *subject, #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); ad.a.u.net->family = PF_INET6; - ad.a.u.net->dport = ntohs(address->sin6_port); + ad.a.u.net->dport = address->sin6_port; if (act == SMK_RECEIVING) ad.a.u.net->v6info.saddr = address->sin6_addr; else -- GitLab From 4a48b4c428dc92b5e0b19de83e7eb8d530dddd48 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 23 Feb 2022 11:41:18 -0500 Subject: [PATCH 0859/1586] MAINTAINERS: add missing security/integrity/platform_certs Define a new KEYS/KEYRINGS_INTEGRITY record so that any changes to platform_certs/ are posted on the linux-integrity mailing list as well. Reviewed-by: Jarkko Sakkinen Signed-off-by: Mimi Zohar --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fdf0420ba477e..6328cd4535bcf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10660,6 +10660,14 @@ F: include/linux/keyctl.h F: include/uapi/linux/keyctl.h F: security/keys/ +KEYS/KEYRINGS_INTEGRITY +M: Jarkko Sakkinen +M: Mimi Zohar +L: linux-integrity@vger.kernel.org +L: keyrings@vger.kernel.org +S: Supported +F: security/integrity/platform_certs + KFENCE M: Alexander Potapenko M: Marco Elver -- GitLab From 9feaf8b387ee0ece9c1d7add308776b502a35d0c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Feb 2022 20:18:51 -0800 Subject: [PATCH 0860/1586] efi: fix return value of __setup handlers When "dump_apple_properties" is used on the kernel boot command line, it causes an Unknown parameter message and the string is added to init's argument strings: Unknown kernel command line parameters "dump_apple_properties BOOT_IMAGE=/boot/bzImage-517rc6 efivar_ssdt=newcpu_ssdt", will be passed to user space. Run /sbin/init as init process with arguments: /sbin/init dump_apple_properties with environment: HOME=/ TERM=linux BOOT_IMAGE=/boot/bzImage-517rc6 efivar_ssdt=newcpu_ssdt Similarly when "efivar_ssdt=somestring" is used, it is added to the Unknown parameter message and to init's environment strings, polluting them (see examples above). Change the return value of the __setup functions to 1 to indicate that the __setup options have been handled. Fixes: 58c5475aba67 ("x86/efi: Retrieve and assign Apple device properties") Fixes: 475fb4e8b2f4 ("efi / ACPI: load SSTDs from EFI variables") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: Ard Biesheuvel Cc: linux-efi@vger.kernel.org Cc: Lukas Wunner Cc: Octavian Purdila Cc: "Rafael J. Wysocki" Cc: Matt Fleming Link: https://lore.kernel.org/r/20220301041851.12459-1-rdunlap@infradead.org Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/apple-properties.c | 2 +- drivers/firmware/efi/efi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 4c3201e290e29..ea84108035eb0 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -24,7 +24,7 @@ static bool dump_properties __initdata; static int __init dump_properties_enable(char *arg) { dump_properties = true; - return 0; + return 1; } __setup("dump_apple_properties", dump_properties_enable); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 7de3f5b6e8d0a..5502e176d51be 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -212,7 +212,7 @@ static int __init efivar_ssdt_setup(char *str) memcpy(efivar_ssdt, str, strlen(str)); else pr_warn("efivar_ssdt: name too long: %s\n", str); - return 0; + return 1; } __setup("efivar_ssdt=", efivar_ssdt_setup); -- GitLab From 8d4c998919320206f8832dc413e23fdd27ef2274 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Feb 2022 10:12:22 +0000 Subject: [PATCH 0861/1586] irqchip/qcom-pdc: Kill PDC_NO_PARENT_IRQ PDC_NO_PARENT_IRQ is pretty pointless, as all it indicates is that the PDC terminates the interrupt hierarchy. Which is exactly the same as not having a mapping in the GIC space. This is also bad practice to treat the absence of a hwirq as a hwirq itself. Just explicitly use the region mapping pointer, and drop the definition. Signed-off-by: Marc Zyngier Reviewed-by: Maulik Shah Link: https://lore.kernel.org/r/20220224101226.88373-2-maz@kernel.org --- drivers/irqchip/qcom-pdc.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 173e6520e06ec..3b214c4e6755a 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -30,14 +30,14 @@ #define IRQ_ENABLE_BANK 0x10 #define IRQ_i_CFG 0x110 -#define PDC_NO_PARENT_IRQ ~0UL - struct pdc_pin_region { u32 pin_base; u32 parent_base; u32 cnt; }; +#define pin_to_hwirq(r, p) ((r)->parent_base + (p) - (r)->pin_base) + static DEFINE_RAW_SPINLOCK(pdc_lock); static void __iomem *pdc_base; static struct pdc_pin_region *pdc_region; @@ -186,19 +186,17 @@ static struct irq_chip qcom_pdc_gic_chip = { .irq_set_affinity = irq_chip_set_affinity_parent, }; -static irq_hw_number_t get_parent_hwirq(int pin) +static struct pdc_pin_region *get_pin_region(int pin) { int i; - struct pdc_pin_region *region; for (i = 0; i < pdc_region_cnt; i++) { - region = &pdc_region[i]; - if (pin >= region->pin_base && - pin < region->pin_base + region->cnt) - return (region->parent_base + pin - region->pin_base); + if (pin >= pdc_region[i].pin_base && + pin < pdc_region[i].pin_base + pdc_region[i].cnt) + return &pdc_region[i]; } - return PDC_NO_PARENT_IRQ; + return NULL; } static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec, @@ -221,7 +219,8 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, { struct irq_fwspec *fwspec = data; struct irq_fwspec parent_fwspec; - irq_hw_number_t hwirq, parent_hwirq; + struct pdc_pin_region *region; + irq_hw_number_t hwirq; unsigned int type; int ret; @@ -234,8 +233,8 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - parent_hwirq = get_parent_hwirq(hwirq); - if (parent_hwirq == PDC_NO_PARENT_IRQ) + region = get_pin_region(hwirq); + if (!region) return irq_domain_disconnect_hierarchy(domain->parent, virq); if (type & IRQ_TYPE_EDGE_BOTH) @@ -247,7 +246,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, parent_fwspec.fwnode = domain->parent->fwnode; parent_fwspec.param_count = 3; parent_fwspec.param[0] = 0; - parent_fwspec.param[1] = parent_hwirq; + parent_fwspec.param[1] = pin_to_hwirq(region, hwirq); parent_fwspec.param[2] = type; return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, @@ -265,7 +264,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, { struct irq_fwspec *fwspec = data; struct irq_fwspec parent_fwspec; - irq_hw_number_t hwirq, parent_hwirq; + struct pdc_pin_region *region; + irq_hw_number_t hwirq; unsigned int type; int ret; @@ -281,8 +281,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - parent_hwirq = get_parent_hwirq(hwirq); - if (parent_hwirq == PDC_NO_PARENT_IRQ) + region = get_pin_region(hwirq); + if (!region) return irq_domain_disconnect_hierarchy(domain->parent, virq); if (type & IRQ_TYPE_EDGE_BOTH) @@ -294,7 +294,7 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, parent_fwspec.fwnode = domain->parent->fwnode; parent_fwspec.param_count = 3; parent_fwspec.param[0] = 0; - parent_fwspec.param[1] = parent_hwirq; + parent_fwspec.param[1] = pin_to_hwirq(region, hwirq); parent_fwspec.param[2] = type; return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, -- GitLab From 4dc70713dc24dceeea7f106828674744a6294860 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Feb 2022 10:12:23 +0000 Subject: [PATCH 0862/1586] irqchip/qcom-pdc: Kill non-wakeup irqdomain A careful look at the way the PDC driver works shows that: - all interrupts are in the same space - all interrupts are treated the same And yet the driver creates two domains based on whether the interrupt gets mapped directly or from the pinctrl code, which is obviously a waste of resources. Kill the non-wakeup domain and unify all the interrupt handling. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224101226.88373-3-maz@kernel.org --- drivers/irqchip/qcom-pdc.c | 84 +++++--------------------------------- 1 file changed, 10 insertions(+), 74 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 3b214c4e6755a..5be531403f500 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -21,7 +21,6 @@ #include #include -#define PDC_MAX_IRQS 168 #define PDC_MAX_GPIO_IRQS 256 #define CLEAR_INTR(reg, intr) (reg & ~(1 << intr)) @@ -224,51 +223,6 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, unsigned int type; int ret; - ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type); - if (ret) - return ret; - - ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - &qcom_pdc_gic_chip, NULL); - if (ret) - return ret; - - region = get_pin_region(hwirq); - if (!region) - return irq_domain_disconnect_hierarchy(domain->parent, virq); - - if (type & IRQ_TYPE_EDGE_BOTH) - type = IRQ_TYPE_EDGE_RISING; - - if (type & IRQ_TYPE_LEVEL_MASK) - type = IRQ_TYPE_LEVEL_HIGH; - - parent_fwspec.fwnode = domain->parent->fwnode; - parent_fwspec.param_count = 3; - parent_fwspec.param[0] = 0; - parent_fwspec.param[1] = pin_to_hwirq(region, hwirq); - parent_fwspec.param[2] = type; - - return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, - &parent_fwspec); -} - -static const struct irq_domain_ops qcom_pdc_ops = { - .translate = qcom_pdc_translate, - .alloc = qcom_pdc_alloc, - .free = irq_domain_free_irqs_common, -}; - -static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs, void *data) -{ - struct irq_fwspec *fwspec = data; - struct irq_fwspec parent_fwspec; - struct pdc_pin_region *region; - irq_hw_number_t hwirq; - unsigned int type; - int ret; - ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type); if (ret) return ret; @@ -301,16 +255,9 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, &parent_fwspec); } -static int qcom_pdc_gpio_domain_select(struct irq_domain *d, - struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token) -{ - return bus_token == DOMAIN_BUS_WAKEUP; -} - -static const struct irq_domain_ops qcom_pdc_gpio_ops = { - .select = qcom_pdc_gpio_domain_select, - .alloc = qcom_pdc_gpio_alloc, +static const struct irq_domain_ops qcom_pdc_ops = { + .translate = qcom_pdc_translate, + .alloc = qcom_pdc_alloc, .free = irq_domain_free_irqs_common, }; @@ -361,7 +308,7 @@ static int pdc_setup_pin_mapping(struct device_node *np) static int qcom_pdc_init(struct device_node *node, struct device_node *parent) { - struct irq_domain *parent_domain, *pdc_domain, *pdc_gpio_domain; + struct irq_domain *parent_domain, *pdc_domain; int ret; pdc_base = of_iomap(node, 0); @@ -383,32 +330,21 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) goto fail; } - pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS, - of_fwnode_handle(node), - &qcom_pdc_ops, NULL); - if (!pdc_domain) { - pr_err("%pOF: GIC domain add failed\n", node); - ret = -ENOMEM; - goto fail; - } - - pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain, + pdc_domain = irq_domain_create_hierarchy(parent_domain, IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP, PDC_MAX_GPIO_IRQS, of_fwnode_handle(node), - &qcom_pdc_gpio_ops, NULL); - if (!pdc_gpio_domain) { - pr_err("%pOF: PDC domain add failed for GPIO domain\n", node); + &qcom_pdc_ops, NULL); + if (!pdc_domain) { + pr_err("%pOF: PDC domain add failed\n", node); ret = -ENOMEM; - goto remove; + goto fail; } - irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP); + irq_domain_update_bus_token(pdc_domain, DOMAIN_BUS_WAKEUP); return 0; -remove: - irq_domain_remove(pdc_domain); fail: kfree(pdc_region); iounmap(pdc_base); -- GitLab From d494d088ac44b9cf561362a7856fa20b656be64f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Feb 2022 10:12:24 +0000 Subject: [PATCH 0863/1586] irqchip/qcom-pdc: Kill qcom_pdc_translate helper qcom_pdc_translate() really is nothing but an open coded version of irq_domain_translate_twocell(). Get rid of it and use the common version instead. Signed-off-by: Marc Zyngier Reviewed-by: Maulik Shah Link: https://lore.kernel.org/r/20220224101226.88373-4-maz@kernel.org --- drivers/irqchip/qcom-pdc.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 5be531403f500..837ca6998f6af 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -198,21 +198,6 @@ static struct pdc_pin_region *get_pin_region(int pin) return NULL; } -static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec, - unsigned long *hwirq, unsigned int *type) -{ - if (is_of_node(fwspec->fwnode)) { - if (fwspec->param_count != 2) - return -EINVAL; - - *hwirq = fwspec->param[0]; - *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; - return 0; - } - - return -EINVAL; -} - static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *data) { @@ -223,7 +208,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, unsigned int type; int ret; - ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type); + ret = irq_domain_translate_twocell(domain, fwspec, &hwirq, &type); if (ret) return ret; @@ -256,7 +241,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, } static const struct irq_domain_ops qcom_pdc_ops = { - .translate = qcom_pdc_translate, + .translate = irq_domain_translate_twocell, .alloc = qcom_pdc_alloc, .free = irq_domain_free_irqs_common, }; -- GitLab From a6aca2f460e203781dc41391913cc5b54f4bc0ce Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Feb 2022 10:12:25 +0000 Subject: [PATCH 0864/1586] irqchip/qcom-pdc: Fix broken locking pdc_enable_intr() serves as a primitive to qcom_pdc_gic_{en,dis}able, and has a raw spinlock for mutual exclusion, which is uses with interruptible primitives. This means that this critical section can itself be interrupted. Should the interrupt also be a PDC interrupt, and the endpoint driver perform an irq_disable() on that interrupt, we end-up in a deadlock. Fix this by using the irqsave/irqrestore variants of the locking primitives. Signed-off-by: Marc Zyngier Reviewed-by: Maulik Shah Link: https://lore.kernel.org/r/20220224101226.88373-5-maz@kernel.org --- drivers/irqchip/qcom-pdc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 837ca6998f6af..0cd20ddfae2a1 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -55,17 +55,18 @@ static u32 pdc_reg_read(int reg, u32 i) static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long flags; u32 index, mask; u32 enable; index = pin_out / 32; mask = pin_out % 32; - raw_spin_lock(&pdc_lock); + raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); - raw_spin_unlock(&pdc_lock); + raw_spin_unlock_irqrestore(&pdc_lock, flags); } static void qcom_pdc_gic_disable(struct irq_data *d) -- GitLab From d2febf6bbec5466824432e3d8850fc49e4343572 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Feb 2022 10:12:26 +0000 Subject: [PATCH 0865/1586] irqchip/qcom-pdc: Drop open coded version of __assign_bit() The driver uses what looks like an open-coded version of __assign_bit(). Replace it with the real thing. Signed-off-by: Marc Zyngier Reviewed-by: Maulik Shah Link: https://lore.kernel.org/r/20220224101226.88373-6-maz@kernel.org --- drivers/irqchip/qcom-pdc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 0cd20ddfae2a1..d96916cf6a414 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -23,9 +23,6 @@ #define PDC_MAX_GPIO_IRQS 256 -#define CLEAR_INTR(reg, intr) (reg & ~(1 << intr)) -#define ENABLE_INTR(reg, intr) (reg | (1 << intr)) - #define IRQ_ENABLE_BANK 0x10 #define IRQ_i_CFG 0x110 @@ -55,16 +52,16 @@ static u32 pdc_reg_read(int reg, u32 i) static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long enable; unsigned long flags; u32 index, mask; - u32 enable; index = pin_out / 32; mask = pin_out % 32; raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); - enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); + __assign_bit(mask, &enable, on); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); raw_spin_unlock_irqrestore(&pdc_lock, flags); } -- GitLab From e414c25e3399b2b3d7337dc47abccab5c71b7c8f Mon Sep 17 00:00:00 2001 From: "Souptick Joarder (HPE)" Date: Fri, 18 Feb 2022 22:03:03 +0530 Subject: [PATCH 0866/1586] irqchip/nvic: Release nvic_base upon failure smatch warning was reported as below -> smatch warnings: drivers/irqchip/irq-nvic.c:131 nvic_of_init() warn: 'nvic_base' not released on lines: 97. Release nvic_base upon failure. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Souptick Joarder (HPE) Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220218163303.33344-1-jrdr.linux@gmail.com --- drivers/irqchip/irq-nvic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index ba4759b3e2693..94230306e0eee 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node, if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; } @@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; } -- GitLab From e7d90cfac5510f8c94baa18f9f3f7808859c8332 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 17 Feb 2022 13:49:50 +0100 Subject: [PATCH 0867/1586] PM: domains: Prevent power off for parent unless child is in deepest state A PM domain managed by genpd may support multiple idlestates (power-off states). During genpd_power_off() a genpd governor may be asked to select one of the idlestates based upon the dev PM QoS constraints, for example. However, there is a problem with the behaviour around this in genpd. More precisely, a parent-domain is allowed to be powered off, no matter of what idlestate that has been selected for the child-domain. For the stm32mp1 platform from STMicro, this behaviour doesn't play well. Instead, the parent-domain must not be powered off, unless the deepest idlestate has been selected for the child-domain. As the current behaviour in genpd is quite questionable anyway, let's simply change it into what is needed by the stm32mp1 platform. If it surprisingly turns out that other platforms may need a different behaviour from genpd, then we will have to revisit this to find a way to make it configurable. Signed-off-by: Ulf Hansson Reviewed-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5db704f02e712..c87588c217007 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -636,6 +636,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, atomic_read(&genpd->sd_count) > 0) return -EBUSY; + /* + * The children must be in their deepest (powered-off) states to allow + * the parent to be powered off. Note that, there's no need for + * additional locking, as powering on a child, requires the parent's + * lock to be acquired first. + */ + list_for_each_entry(link, &genpd->parent_links, parent_node) { + struct generic_pm_domain *child = link->child; + if (child->state_idx < child->state_count - 1) + return -EBUSY; + } + list_for_each_entry(pdd, &genpd->dev_list, list_node) { enum pm_qos_flags_status stat; @@ -1073,6 +1085,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, || atomic_read(&genpd->sd_count) > 0) return; + /* Check that the children are in their deepest (powered-off) state. */ + list_for_each_entry(link, &genpd->parent_links, parent_node) { + struct generic_pm_domain *child = link->child; + if (child->state_idx < child->state_count - 1) + return; + } + /* Choose the deepest state when suspending */ genpd->state_idx = genpd->state_count - 1; if (_genpd_power_off(genpd, false)) -- GitLab From 9a6582b839281ee0e874621f1a2139d2aeb9489e Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Wed, 23 Feb 2022 09:03:23 +0100 Subject: [PATCH 0868/1586] PM: domains: use dev_err_probe() to simplify error handling dev_err_probe() can reduce code size, makes the code easier to read and has the added benefit of recording the defer reason for later read out. Use it where appropriate. This also fixes an issue, where an error message in __genpd_dev_pm_attach was not terminated by a line break. Signed-off-by: Ahmad Fatoum Signed-off-by: Sascha Hauer Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c87588c217007..c0d9ad01b32c8 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2267,12 +2267,8 @@ int of_genpd_add_provider_simple(struct device_node *np, /* Parse genpd OPP table */ if (genpd->set_performance_state) { ret = dev_pm_opp_of_add_table(&genpd->dev); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&genpd->dev, "Failed to add OPP table: %d\n", - ret); - return ret; - } + if (ret) + return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n"); /* * Save table for faster processing while setting performance @@ -2331,9 +2327,8 @@ int of_genpd_add_provider_onecell(struct device_node *np, if (genpd->set_performance_state) { ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i); if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n", - i, ret); + dev_err_probe(&genpd->dev, ret, + "Failed to add OPP table for index %d\n", i); goto error; } @@ -2691,12 +2686,8 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, ret = genpd_add_device(pd, dev, base_dev); mutex_unlock(&gpd_list_lock); - if (ret < 0) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to add to PM domain %s: %d", - pd->name, ret); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, "failed to add to PM domain %s\n", pd->name); dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; -- GitLab From f6bfe8b5b2c2a5ac8bd2fc7bca3706e6c3fc26d8 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 25 Feb 2022 14:48:15 +0800 Subject: [PATCH 0869/1586] PM: domains: Fix sleep-in-atomic bug caused by genpd_debug_remove() When a genpd with GENPD_FLAG_IRQ_SAFE gets removed, the following sleep-in-atomic bug will be seen, as genpd_debug_remove() will be called with a spinlock being held. [ 0.029183] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1460 [ 0.029204] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 1, name: swapper/0 [ 0.029219] preempt_count: 1, expected: 0 [ 0.029230] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc4+ #489 [ 0.029245] Hardware name: Thundercomm TurboX CM2290 (DT) [ 0.029256] Call trace: [ 0.029265] dump_backtrace.part.0+0xbc/0xd0 [ 0.029285] show_stack+0x3c/0xa0 [ 0.029298] dump_stack_lvl+0x7c/0xa0 [ 0.029311] dump_stack+0x18/0x34 [ 0.029323] __might_resched+0x10c/0x13c [ 0.029338] __might_sleep+0x4c/0x80 [ 0.029351] down_read+0x24/0xd0 [ 0.029363] lookup_one_len_unlocked+0x9c/0xcc [ 0.029379] lookup_positive_unlocked+0x10/0x50 [ 0.029392] debugfs_lookup+0x68/0xac [ 0.029406] genpd_remove.part.0+0x12c/0x1b4 [ 0.029419] of_genpd_remove_last+0xa8/0xd4 [ 0.029434] psci_cpuidle_domain_probe+0x174/0x53c [ 0.029449] platform_probe+0x68/0xe0 [ 0.029462] really_probe+0x190/0x430 [ 0.029473] __driver_probe_device+0x90/0x18c [ 0.029485] driver_probe_device+0x40/0xe0 [ 0.029497] __driver_attach+0xf4/0x1d0 [ 0.029508] bus_for_each_dev+0x70/0xd0 [ 0.029523] driver_attach+0x24/0x30 [ 0.029534] bus_add_driver+0x164/0x22c [ 0.029545] driver_register+0x78/0x130 [ 0.029556] __platform_driver_register+0x28/0x34 [ 0.029569] psci_idle_init_domains+0x1c/0x28 [ 0.029583] do_one_initcall+0x50/0x1b0 [ 0.029595] kernel_init_freeable+0x214/0x280 [ 0.029609] kernel_init+0x2c/0x13c [ 0.029622] ret_from_fork+0x10/0x20 It doesn't seem necessary to call genpd_debug_remove() with the lock, so move it out from locking to fix the problem. Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains") Signed-off-by: Shawn Guo Reviewed-by: Ulf Hansson Cc: 5.11+ # 5.11+ Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c0d9ad01b32c8..1ee878d126fdf 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2077,9 +2077,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) kfree(link); } - genpd_debug_remove(genpd); list_del(&genpd->gpd_list_node); genpd_unlock(genpd); + genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); -- GitLab From 7dfe105dfc724c82ed3d79a4c47439c516a2410b Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 11 Feb 2022 08:10:27 -0800 Subject: [PATCH 0870/1586] PM: sleep: wakeup: Fix typos in comments Remove the second 'the'. Replace the second 'of' with 'the'. Replace 'couter' with 'counter'. Signed-off-by: Tom Rix Acked-by: Randy Dunlap Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 2 +- drivers/base/power/wakeup.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 0004db4a9d3b9..d487a6bac630f 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq); * * Enables wakeirq conditionally. We need to enable wake-up interrupt * lazily on the first rpm_suspend(). This is needed as the consumer device - * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would + * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would * otherwise try to disable already disabled wakeirq. The wake-up interrupt * starts disabled with IRQ_NOAUTOEN set. * diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 8666590201c9a..a57d469676caa 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -587,7 +587,7 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws) * @ws: Wakeup source to handle. * * Update the @ws' statistics and, if @ws has just been activated, notify the PM - * core of the event by incrementing the counter of of wakeup events being + * core of the event by incrementing the counter of the wakeup events being * processed. */ static void wakeup_source_activate(struct wakeup_source *ws) @@ -733,7 +733,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) /* * Increment the counter of registered wakeup events and decrement the - * couter of wakeup events in progress simultaneously. + * counter of wakeup events in progress simultaneously. */ cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); trace_wakeup_source_deactivate(ws->name, cec); -- GitLab From 444e1154b2bf0b881b65ba1bba5bc8e691fac04a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 15 Feb 2022 11:37:50 +0800 Subject: [PATCH 0871/1586] PM: hibernate: Clean up non-kernel-doc comments Address the following W=1 kernel build warning: kernel/power/swap.c:120: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ad10359030a4c..c51f5507b34fc 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -89,7 +89,7 @@ struct swap_map_page_list { struct swap_map_page_list *next; }; -/** +/* * The swap_map_handle structure is used for handling swap in * a file-alike way */ @@ -117,7 +117,7 @@ struct swsusp_header { static struct swsusp_header *swsusp_header; -/** +/* * The following functions are used for tracing the allocated * swap pages, so that they can be freed in case of an error. */ @@ -171,7 +171,7 @@ static int swsusp_extents_insert(unsigned long swap_offset) return 0; } -/** +/* * alloc_swapdev_block - allocate a swap page and register that it has * been allocated, so that it can be freed in case of an error. */ @@ -190,7 +190,7 @@ sector_t alloc_swapdev_block(int swap) return 0; } -/** +/* * free_all_swap_pages - free swap pages allocated for saving image data. * It also frees the extents used to register which swap entries had been * allocated. -- GitLab From a644161ba11df38e5582e718c99668e282ddbf36 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 28 Feb 2022 11:57:59 -0800 Subject: [PATCH 0872/1586] Documentation: admin-guide: pm: Document uncore frequency scaling Added documentation to configure uncore frequency limits in Intel Xeon processors. Signed-off-by: Srinivas Pandruvada [ rjw: Clean up the document wording ] Signed-off-by: Rafael J. Wysocki --- .../pm/intel_uncore_frequency_scaling.rst | 60 +++++++++++++++++++ .../admin-guide/pm/working-state.rst | 1 + 2 files changed, 61 insertions(+) create mode 100644 Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst new file mode 100644 index 0000000000000..09169d9358356 --- /dev/null +++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +============================== +Intel Uncore Frequency Scaling +============================== + +:Copyright: |copy| 2022 Intel Corporation + +:Author: Srinivas Pandruvada + +Introduction +------------ + +The uncore can consume significant amount of power in Intel's Xeon servers based +on the workload characteristics. To optimize the total power and improve overall +performance, SoCs have internal algorithms for scaling uncore frequency. These +algorithms monitor workload usage of uncore and set a desirable frequency. + +It is possible that users have different expectations of uncore performance and +want to have control over it. The objective is similar to allowing users to set +the scaling min/max frequencies via cpufreq sysfs to improve CPU performance. +Users may have some latency sensitive workloads where they do not want any +change to uncore frequency. Also, users may have workloads which require +different core and uncore performance at distinct phases and they may want to +use both cpufreq and the uncore scaling interface to distribute power and +improve overall performance. + +Sysfs Interface +--------------- + +To control uncore frequency, a sysfs interface is provided in the directory: +`/sys/devices/system/cpu/intel_uncore_frequency/`. + +There is one directory for each package and die combination as the scope of +uncore scaling control is per die in multiple die/package SoCs or per +package for single die per package SoCs. The name represents the +scope of control. For example: 'package_00_die_00' is for package id 0 and +die 0. + +Each package_*_die_* contains the following attributes: + +``initial_max_freq_khz`` + Out of reset, this attribute represent the maximum possible frequency. + This is a read-only attribute. If users adjust max_freq_khz, + they can always go back to maximum using the value from this attribute. + +``initial_min_freq_khz`` + Out of reset, this attribute represent the minimum possible frequency. + This is a read-only attribute. If users adjust min_freq_khz, + they can always go back to minimum using the value from this attribute. + +``max_freq_khz`` + This attribute is used to set the maximum uncore frequency. + +``min_freq_khz`` + This attribute is used to set the minimum uncore frequency. + +``current_freq_khz`` + This attribute is used to get the current uncore frequency. diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index 5d2757e2de656..ee45887811ffa 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -15,3 +15,4 @@ Working-State Power Management cpufreq_drivers intel_epb intel-speed-select + intel_uncore_frequency_scaling -- GitLab From ba7ffcd4c4da374b0f64666354eeeda7d3827131 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Feb 2022 14:05:32 -0800 Subject: [PATCH 0873/1586] PM: hibernate: fix __setup handler error handling If an invalid value is used in "resumedelay=", it is silently ignored. Add a warning message and then let the __setup handler return 1 to indicate that the kernel command line option has been handled. Fixes: 317cf7e5e85e3 ("PM / hibernate: convert simple_strtoul to kstrtoul") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 49d1df0218cb8..0ac805b753e58 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1330,7 +1330,7 @@ static int __init resumedelay_setup(char *str) int rc = kstrtouint(str, 0, &resume_delay); if (rc) - return rc; + pr_warn("resumedelay: bad option string '%s'\n", str); return 1; } -- GitLab From 7a64ca17e4dd50d5f910769167f3553902777844 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Feb 2022 14:05:44 -0800 Subject: [PATCH 0874/1586] PM: suspend: fix return value of __setup handler If an invalid option is given for "test_suspend=